RNA-seq report for patient sample CCR180038_SV18T002P006_RNA.
##### We attempt to structure the script in the following way:
# 1. Defining functions
# 2. Loading libraries
# 3. Loading sample data and reference datasets
# Then... code chunks involving data processing
# Then... code chunks calling the processed data to produce tables / plots / data summary
# Finish with Session info in Addendum section
##### The processed data is stored in "ref_datasets.list" list variable with elements holding the following data:
# 1. ref_datasets.list[[tissue]][["combined_data"]] = combined read count data (reference datasets + sample data) ("combineDatasets" function output in the "load_ref_data chunk")
# 2. ref_datasets.list[[tissue]][["sample_annot"]] = combined data samples annotation ("combineDatasets" function output in the "load_ref_data chunk")
# 3. ref_datasets.list[[tissue]][["combined_data_processed"]] = transformed, filtered and normalised data (see "data_transformation" and "data_normalisation" chunks)
# 4. ref_datasets.list[[tissue]][["pca"]] = PCA results
# 5. ref_datasets.list[[tissue]][["gene_annot_all"]] = gene annotation for combined read count data, containing all input genes. The annotation includes "SYMBOL", "GENEBIOTYPE", "ENSEMBL", "SEQNAME", "GENESEQSTART", "GENESEQEND". "ENSEMBL" is used for rownames
# 6. ref_datasets.list[[tissue]][["gene_annot"]] = gene annotation for transformed, filtered and normalised data. The annotation includes "SYMBOL", "GENEBIOTYPE", "ENSEMBL", "SEQNAME", "GENESEQSTART", "GENESEQEND". "SYMBOL" is used for rownames
# 7. ref_datasets.list[[tissue]][["expr_mut_cn_data"]] = combined expression, mutation and copy-number data
##### Define functions
##### Create 'not in' operator
"%!in%" <- function(x,table) match(x,table, nomatch = 0) == 0
##### Prepare object to write into a file
prepare2write <- function (x) {
x2write <- cbind(rownames(x), x)
colnames(x2write) <- c("Gene",colnames(x))
return(x2write)
}
##### Combine sample expression profile with reference datasets. This function outputs a vector with first element containing the merged data and second element containing merged targets info
combineDatasets <- function(sample_name, sample_counts, ref_dataset) {
##### Read file with reference datasets information
DatasetInput <- read.table(ref_dataset, sep="\t", as.is=TRUE, header=TRUE, row.names=1)
##### Extract info about target file for the first dataset
fileInfo <- strsplit(DatasetInput[,"Target_file"], split='/', fixed=TRUE)
targetFile <- read.table(DatasetInput[1,"Target_file"], sep="\t", as.is=TRUE, header=TRUE)[,c(1:4)]
rownames(targetFile) <- targetFile[,"Sample_name"]
targetFile <- cbind(targetFile[,2:4],rownames(DatasetInput[1,]))
colnames(targetFile)[ncol(targetFile)] <- "Dataset"
if ( nrow(DatasetInput) > 1 ) {
for ( i in 2:nrow(DatasetInput) ) {
##### Create a temporary object to store info from the remaining target files
targetFileTmp <- read.table(DatasetInput[i,"Target_file"], sep="\t", as.is=TRUE, header=TRUE)[,c(1:4)]
rownames(targetFileTmp) <- targetFileTmp[,"Sample_name"]
targetFileTmp <- cbind(targetFileTmp[,2:4],rownames(DatasetInput[i,]))
colnames(targetFileTmp)[ncol(targetFileTmp)] <- "Dataset"
targetFile <- rbind(targetFile, targetFileTmp)
}
}
##### Add sample info
sampleTargetFile <- data.frame(sample_counts, sample_name, NA, sample_name)
names(sampleTargetFile) <- names(targetFile)
rownames(sampleTargetFile) <- sample_name
targetFile <- rbind( targetFile, sampleTargetFile )
##### Make syntactically valid names
rownames(targetFile) <- make.names(rownames(targetFile))
##### Read sample read count file and combine it with reference datasets
datasets.comb <- read.table(sample_counts, sep="\t", as.is=TRUE, header=FALSE, row.names=NULL)
names(datasets.comb) <- c("", sample_name)
##### list genes present in the read count file
gene_list <- as.vector(datasets.comb[,1])
##### Loop through the expression data from different datasets and merge them into one matrix
for ( data_matrix in DatasetInput[ , "Expression_matrix" ] ) {
##### Add data from the reference datasets
dataset <- as.data.frame( read.table(data_matrix, header=TRUE, sep="\t", row.names=NULL) )
##### list genes present in individal files
gene_list <- c( gene_list, as.vector(dataset[,1]) )
##### Merge the expression datasets and make sure that the genes order is the same
datasets.comb <- merge( datasets.comb, dataset, by=1, all = FALSE, sort= TRUE)
##### Remove per-sample data for merged samples to free some memory
rm(dataset)
}
##### Use gene IDs as rownames
rownames(datasets.comb) <- datasets.comb[,1]
datasets.comb <- datasets.comb[, -1]
##### Make syntactically valid names
colnames(datasets.comb) <- make.names(colnames(datasets.comb))
##### Make sure that the target file contains info only about samples present in the data matrix
targetFile <- targetFile[ rownames(targetFile) %in% colnames(datasets.comb), ]
##### Make sure that the samples order in the data matrix is the same as in the target file
datasets.comb <- datasets.comb[ , rownames(targetFile) ]
##### Identify genes that were not present across all per-sampel files and were ommited in the merged matrix
gene_list <- unique(gene_list)
gene_list.missing <- gene_list[ gene_list %!in% rownames(datasets.comb) ]
##### Write list of missing genes into a file
if ( length(gene_list.missing) > 0 ) {
write.table(prepare2write(gene_list.missing), file = paste0(params$report_dir, "/", sample_name,".missing_genes.txt"), sep="\t", quote=FALSE, row.names=TRUE, append = FALSE )
}
return( list(datasets.comb, targetFile) )
}
##### Assign colours to different groups
getTargetsColours <- function(targets) {
##### Predefined selection of colours for groups
targets.colours <- c("red","cornflowerblue","green","darkred","darkgoldenrod","deepskyblue", "coral", "blue", "chartreuse4", "bisque4", "chocolate3", "cadetblue3", "darkslategrey", "lightgoldenrod4", "mediumpurple4", "orangered3","indianred1","blueviolet","darkolivegreen4","darkgoldenrod4","firebrick3","deepskyblue4", "coral3", "dodgerblue1", "chartreuse3", "bisque3", "chocolate4", "cadetblue", "darkslategray4", "lightgoldenrod3", "mediumpurple3", "orangered1")
f.targets <- factor(targets)
vec.targets <- targets.colours[1:length(levels(f.targets))]
targets.colour <- rep(0,length(f.targets))
for(i in 1:length(f.targets))
targets.colour[i] <- vec.targets[ f.targets[i]==levels(f.targets)]
return( list(vec.targets, targets.colour) )
}
##### Assign colours to different datasets
getDatasetsColours <- function(datasets) {
##### Predefined selection of colours for datasets
datasets.colours <- c("dodgerblue","firebrick","lightslategrey","darkseagreen","orange","darkcyan","bisque", "coral2", "cadetblue3","red","blue","green")
f.datasets <- factor(datasets)
vec.datasets <- datasets.colours[1:length(levels(f.datasets))]
datasets.colour <- rep(0,length(f.datasets))
for(i in 1:length(f.datasets))
datasets.colour[i] <- vec.datasets[ f.datasets[i]==levels(f.datasets)]
return( list(vec.datasets, datasets.colour) )
}
##### Perform PCA. This function outputs a list with dataframe and samples colouring info ready for plotting
pca <- function(data, targets) {
##### Keep only genes with variance > 0 across all samples
rsd <- apply(data,1,sd)
data.subset <- data[rsd>0,]
##### Perform PCA
data.subset_pca <- prcomp(t(data.subset), scale=FALSE)
##### Get variance importance for all principal components
importance_pca <- summary(data.subset_pca)$importance[2,]
importance_pca <- paste(round(100*importance_pca, 2), "%", sep="")
names(importance_pca) <- names(summary(data.subset_pca)$importance[2,])
##### Prepare data frame
data.subset_pca.df <- data.frame(targets$Target, targets$Dataset, data.subset_pca$x[,"PC1"], data.subset_pca$x[,"PC2"], data.subset_pca$x[,"PC3"])
colnames(data.subset_pca.df) <- c("Target", "Dataset", "PC1", "PC2", "PC3")
##### Assigne colours to targets and datasets
targets.colour <- getTargetsColours(target$Target)
datasets.colour <- getDatasetsColours(target$Dataset)
##### Create a list with dataframe and samples colouring info
pca.list <- list(data.subset_pca.df, importance_pca, targets.colour, datasets.colour)
names(pca.list) <- c("pca.df", "importance_pca", "targets", "datasets")
return( pca.list )
}
##### Convert a vector of numbers into corresponding vector of their percentiles
perc.rank <- function(x) trunc(rank(x))*100/length(x)
##### Perform range standardization between 0 and 1 (for the cumulative sums)
standardization <- function(x) sort(x-min(x))/(max(x)-min(x))
##### Calculate mean, sd, quantiles and cumulative franctions for expression data from specific sample group
exprGroupStats <- function(data, targets, target) {
##### Subset data for defined biological group
data.group <- data[, targets$Target %in% target ]
##### For groups with > 1 sample get the mean and standard deviation for each gene
if ( !is.null(ncol(data.group)) ) {
data.group.mean <- rowMeans(data.group)
data.group.mean <- sort(data.group.mean)
data.group.sd <- rowSds(data.group)
} else {
data.group.mean <- sort(data.group)
data.group.sd <- rep( NA, length(data.group))
}
##### Make sure the mean and sd vectors have the same gene order
names(data.group.sd) <- rownames(data)
data.group.sd <- data.group.sd[names(data.group.mean)]
##### Convert a expression values into corresponding percentiles
data.group.q <- perc.rank(data.group.mean)
##### Perform range standardization between 0 and 1 (for the cumulative sums), otherwise the negative values are summed up
data.group.s <- standardization(data.group.mean)
##### Calculate cumulative sums and perform range standardization between 0 and 1
data.group.cum <- standardization(cumsum(data.group.s))
##### Perform Z-score transformation of the mean expression values
data.group.z <- scale(data.group.mean)
##### Organise the data into data frame
data.group.df <- as.data.frame(cbind( data.group.mean, data.group.sd, data.group.z, data.group.q, data.group.cum))
names(data.group.df) <- c("mean", "sd", "z", "quantile", "cumulative_fraction")
return( data.group.df )
}
##### Generate cumulative distribution function (CDF) plot for selected gene. If option "addBoxPlot" = TRUE, then generate additional boxplot below to show the data variance for selected gene in individual groups
cdfPlot <- function(gene, data, targets, sampleName, normal, cancer, addBoxPlot = FALSE, plot_mode = "static") {
##### Get expression-related stats for each group
sample.expr.cum <- exprGroupStats(data, targets, sampleName)
cancer.expr.cum <- exprGroupStats(data, targets, cancer)
normal.expr.cum <- exprGroupStats(data, targets, normal)
##### Extract expression for selected genes
sample.expr.cum.selected <- sample.expr.cum[ rownames(sample.expr.cum) %in% gene, ]
cancer.expr.cum.selected <- cancer.expr.cum[ rownames(cancer.expr.cum) %in% gene, ]
normal.expr.cum.selected <- normal.expr.cum[ rownames(normal.expr.cum) %in% gene, ]
##### Generate box-plot for selected gene
if ( addBoxPlot ) {
data.z <- scale(data)
targets$Target[ targets$Target==sampleName ] <- "Patient"
gene.expr.df <- data.frame(targets$Target, data.z[gene, ])
colnames(gene.expr.df) <- c("Group", "Expression")
##### Reorder groups
gene.expr.df$Group <- factor(gene.expr.df$Group, levels=c(normal, cancer, "Patient"))
p2 <- plot_ly(gene.expr.df, x= ~Expression, color = ~Group, type = 'box', jitter = 0.3, pointpos = 0, boxpoints = 'all', colors = c("darkgreen", "red", "black"), opacity = 0.5, orientation = 'h', width = 800, height = 600, showlegend=FALSE)
}
##### Generate interactive CFD plot with plotly
p1 <- plot_ly(sample.expr.cum, x = ~z, color = I("black")) %>%
##### Add sample data
add_markers(y = sample.expr.cum.selected$cumulative_fraction, x = sample.expr.cum.selected$z,
text = rownames(sample.expr.cum.selected ),
name = "Patient",
marker = list(size = 12, color = "black"),
showlegend = TRUE) %>%
add_lines(y = sample.expr.cum$cumulative_fraction, x = sample.expr.cum$z,
line = list(color = "grey"),
text = rownames( sample.expr.cum ),
name = "Patient", showlegend = FALSE) %>%
##### Add cancer data
add_markers(y = cancer.expr.cum.selected$cumulative_fraction, x = cancer.expr.cum.selected$z,
text = rownames( cancer.expr.cum.selected),
name = cancer,
marker = list(size = 12, opacity = 0.5, color = "red"),
showlegend = TRUE) %>%
add_lines(y = cancer.expr.cum$cumulative_fraction, x = cancer.expr.cum$z, opacity = 0.5,
line = list(color = "red", dash = "dash"),
text = rownames( cancer.expr.cum ),
name = cancer, showlegend = FALSE) %>%
##### Add normal data
add_markers(y = normal.expr.cum.selected$cumulative_fraction, x = normal.expr.cum.selected$z,
text = rownames( normal.expr.cum.selected ),
name = normal,
marker = list(size = 12, opacity = 0.5, color = "green"),
showlegend = TRUE) %>%
add_lines(y = normal.expr.cum$cumulative_fraction, x = normal.expr.cum$z, opacity = 0.5,
line = list(color = "green", dash = "dash"),
text = rownames( normal.expr.cum ),
name = normal, showlegend = FALSE) %>%
##### Add quantile lines
add_lines(y = seq(0,1,0.1), x = rep(quantile(sample.expr.cum$z)[2], 11), opacity = 0.5,
line = list(color = "gray", dash = "dash"),
name = "Q1", showlegend = FALSE) %>%
add_lines(y = seq(0,1,0.1), x = rep(quantile(sample.expr.cum$z)[3], 11), opacity = 0.5,
line = list(color = "gray", dash = "dash"),
name = "Q2", showlegend = FALSE) %>%
add_lines(y = seq(0,1,0.1), x = rep(quantile(sample.expr.cum$z)[4], 11), opacity = 0.5,
line = list(color = "gray", dash = "dash"),
name = "Q3", showlegend = FALSE) %>%
layout(title = gene, xaxis = list(title = "mRNA expression (Z-score)", zeroline = FALSE, range = c(min(sample.expr.cum$z)-1.5, max(sample.expr.cum$z)+1.5)),
yaxis = list(title = "Cumulative fraction"),
legend = list(orientation = 'v', x = 0.02, y = 1, bgcolor = "white")
##### Annotate the gene of interest
#annotations = list(x = sample.expr.cum.selected$z, y = sample.expr.cum.selected$cumulative_fraction,
#text = rownames(sample.expr.cum.selected), xref = "x", yref = "y", showarrow = TRUE, arrowhead = 0, opacity = 0.5, ax = 30, ay = 30)
)
##### Combine CDF plot with boxplot if this option is selected
if ( addBoxPlot ) {
p1_2 <- subplot(p1, p2, nrows = 2, shareX = TRUE, shareY = FALSE, titleY = TRUE) %>%
layout(xaxis = list(title = "mRNA expression (Z-score)", zeroline = FALSE, range = c(min(sample.expr.cum$z)-1.5, max(sample.expr.cum$z)+1.5)),
yaxis = list(title = "Cumulative fraction"),
legend = list(orientation = 'v', x = 0.02, y = 1, bgcolor = "white"),
yaxis2 = list( title =""), xaxis2 = list(title = paste0(gene, " mRNA expression (Z-score)")), margin = list(l=50, r=50, b=50, t=50, pad=4), autosize = FALSE,
showlegend=TRUE, showlegend2=FALSE)
##### Embed static rather interactive plots into the html report if requested by user. This will reduce the report size. TO this end the orca() function in plotly is used. Of note, this requires orca (https://github.com/plotly/orca) installation (conda option worked well for me, https://github.com/plotly/orca#method-1-conda), but the orca needs to be in the PATH, see https://github.com/plotly/orca/pull/122).
if ( plot_mode == "static" ) {
##### Create directory for CDF plots
cdfPlotsDir <- paste(params$report_dir, "CDF_plots", sep = "/")
if ( !file.exists(cdfPlotsDir) ) {
dir.create(cdfPlotsDir, recursive=TRUE)
}
##### Add access token, required by orca function, to the shell environment
Sys.setenv('MAPBOX_TOKEN' = 'secret token')
##### Change directory to folder with CDF plots
setwd(cdfPlotsDir)
##### Save the static image into a file
orca(p1_2, format = "png", file = paste0(gene, "_cdf_plot.png"), width = 800, height = 600)
##### Present the static plot in the report
include_graphics(paste(cdfPlotsDir, paste0(gene, "_cdf_plot.png"), sep = "/"), dpi = 72)
} else if ( plot_mode == "interactive" ) {
return( p1_2 )
}
} else {
if ( plot_mode == "static") {
##### Create directory for CDF plots
cdfPlotsDir <- paste(params$report_dir, "CDF_plots", sep = "/")
if ( !file.exists(cdfPlotsDir) ) {
dir.create(cdfPlotsDir, recursive=TRUE)
}
##### Add access token, required by orca function, to the shell environment
Sys.setenv('MAPBOX_TOKEN' = 'secret token')
##### Change directory to folder with CDF plots
setwd(cdfPlotsDir)
##### Save the static image into a file
orca(p1, format = "png", file = paste0(gene, "_cdf_plot.png"), width = 800, height = 200)
##### Present the static plot in the report
include_graphics(paste(cdfPlotsDir, paste0(gene, "_cdf_plot.png"), sep = "/"), dpi = 72)
} else if ( plot_mode == "interactive" ) {
return( p1 )
}
}
}
##### Generate scatterplot with per-gene expression values (y-axis), CN values (x-axis) and mutation status info (colours), if provided
mutCNexprPlot <- function(data, mut_data = FALSE, cn_bottom = 1.5, cn_top = 3, plot_mode = "static") {
##### Generate scatterplot with per-gene expression values (y-axis), CN values (x-axis) and mutation status info (colours)
if ( mut_data ) {
p <- plot_ly(data, x = ~CN, y = ~mRNA, color = ~Mutation, text=~Gene, type='scatter', mode = "markers", marker = list(size=10, symbol="circle"), width = 800, height = 600) %>%
add_annotations( text="Variant consequence", xref="paper", yref="paper",
x=1.02, xanchor="left",
y=0.85, yanchor="top",
legendtitle=TRUE, showarrow=FALSE ) %>%
add_annotations( data = data[ data$CN > cn_top | data$CN < cn_bottom ,], text=~Gene,
x=~CN, xanchor="left",
y=~mRNA, yanchor="top",
font = list(color = "Grey",
size = 10),
legendtitle=TRUE, showarrow=FALSE ) %>%
layout( xaxis = list(title = "Copy-number values"), yaxis = list(title = "mRNA expression (Z-score)"), margin = list(l=50, r=50, b=50, t=50, pad=4), autosize = F, legend = list( orientation = 'v', y=0.8, yanchor="top"), showlegend=TRUE)
##### Generate scatterplot with per-gene expression values (y-axis) and CN values (x-axis)
} else {
p <- plot_ly(data, x = ~CN, y = ~mRNA, text=~Gene, type='scatter', mode = "markers", marker = list(size=10, symbol="circle"), width = 800, height = 600) %>%
add_annotations( data = data[ data$CN > cn_top | data$CN < cn_bottom ,], text=~Gene,
x=~CN, xanchor="left",
y=~mRNA, yanchor="top",
font = list(color = "Grey",
size = 10),
legendtitle=TRUE, showarrow=FALSE ) %>%
layout( xaxis = list(title = "Copy-number values"), yaxis = list(title = "mRNA expression (Z-score)"), margin = list(l=50, r=50, b=50, t=50, pad=4), autosize = F, legend = list( orientation = 'v', y=0.8, yanchor="top"), showlegend=FALSE)
}
##### Embed static rather interactive plots into the html report if requested by user. This will reduce the report size. TO this end the orca() function in plotly is used. Of note, this requires orca (https://github.com/plotly/orca) installation (conda option worked well for me, https://github.com/plotly/orca#method-1-conda), but the orca needs to be in the PATH, see https://github.com/plotly/orca/pull/122).
if ( plot_mode == "static" ) {
##### Create directory for the plots
mutCNexprPlotDir <- paste(params$report_dir, "mut_cn_expr_plot", sep = "/")
if ( !file.exists(mutCNexprPlotDir) ) {
dir.create(mutCNexprPlotDir, recursive=TRUE)
}
##### Add access token, required by orca function, to the shell environment
Sys.setenv('MAPBOX_TOKEN' = 'secret token')
##### Change directory to folder with CDF plots
setwd(mutCNexprPlotDir)
##### Save the static image into a file
orca(p, format = "png", file = "mut_cn_expr_plot.png", width = 800, height = 600)
##### Present the static plot in the report
include_graphics(paste(mutCNexprPlotDir, "mut_cn_expr_plot.png", sep = "/"), dpi = 72)
} else if ( plot_mode == "interactive" ) {
return( p )
}
}
##### Fusion visualisation
fusion_png <- function(geneA, geneB, clinker_results ) {
##### Get path to fusion visualisation pdf file
fusion_pdf <- paste(clinker_results, paste0(geneA, "_", geneB, ".pdf"), sep = "/")
##### Export pdf to png
fusion_png <- paste(clinker_results, paste0(geneA, "_", geneB, ".png"), sep = "/")
fusion <- image_read_pdf(fusion_pdf, pages = NULL, density = 300)
image_write(fusion, path = fusion_png, format = "png")
##### Present the converted file in the report
include_graphics(fusion_png)
}
##### Generate table with coloured cells indicating expression values for selected genes
exprTable <- function(genes, data, cn_data = NULL, cn_decrease = TRUE, targets, sampleName, normal, cancer, genes_annot = NULL, OncoKB_genes = NULL, mut_annot = NULL, fusion_genes = NULL, ext_links = FALSE, type = "z") {
##### Check which of the selected genes are not present in the expression data
genes.absent <- genes[ genes %!in% rownames(data) ]
targets.list <- unique(targets$Target)
##### Initiate dataframe for expression mean values in each group
group.z <- as.data.frame(matrix(NA, ncol = length(targets.list), nrow = nrow(data)))
colnames(group.z) <- targets.list
rownames(group.z) <- sort(rownames(data))
for ( group in targets.list ) {
##### Calculate z-score for each group
group.stats <- exprGroupStats(data[rownames(group.z), ], targets, group)
group.stats <- group.stats[order(rownames(group.stats)), ]
#### Present expression data as percentiles or z-score values (default)
if ( type == "perc" ) {
group.z[, group] <- round(group.stats$quantile, digits=1)
} else {
group.z[, group] <- round(group.stats$z, digits=2)
}
}
##### Compute Z-scores sd for each gene across groups
group.z <- cbind(group.z, round(rowSds(as.matrix(group.z)), digits = 2))
names(group.z)[ncol(group.z)] <- "SD"
##### Calculate Z-score differneces between investigated sample and normal group mean values
group.z <- cbind(group.z, round((group.z[, sampleName] - group.z[, normal]), digits = 2))
names(group.z)[ncol(group.z)] <- "Patient vs Normal"
##### Calculate Z-score differneces between investigated sample and cancer group mean values
group.z <- cbind(group.z, round((group.z[, sampleName] - group.z[, cancer]), digits = 2))
names(group.z)[ncol(group.z)] <- paste0("Patient vs ", cancer)
##### Add NAs for genes that are absent in the expression matrix. In the "Patient vs Normal" and "Patient vs PDAC" columns provide "0"s to facilitate interactive sorting the table. These will appear in blank cells in the table
if ( length(genes.absent) > 0 ) {
NAs.df <- data.frame(matrix(NA, ncol = ncol(group.z), nrow = length(genes.absent)))
names(NAs.df) <- names(group.z)
rownames(NAs.df) <- genes.absent
NAs.df[ names(NAs.df) %in% c("Patient vs Normal", "Patient vs PDAC")] <- 0
group.z <- rbind( group.z, NAs.df)
}
##### Change sample ID to "Patient" and normal sample to "Normal" for better visualisation
names(group.z)[names(group.z)==sampleName] <- "Patient"
targets.list[targets.list==sampleName] <- "Patient"
names(group.z)[names(group.z)==normal] <- "Normal"
targets.list[targets.list==normal] <- "Normal"
##### Reorder groups
group.z <- cbind(group.z[ , c("Normal", cancer, "Patient")], group.z[, c("SD", "Patient vs Normal", "Patient vs PDAC" )])
##### Add genes annotation
if ( !is.null(genes_annot) ) {
##### Remove rows with duplicated gene symbols
if ( "SYMBOL" %in% names(genes_annot) ) {
genes_annot <- genes_annot[!duplicated(genes_annot$SYMBOL),]
rownames(genes_annot) <- genes_annot$SYMBOL
genes_annot <- genes_annot[ , -c(match("SYMBOL", names(genes_annot))), drop=FALSE]
}
##### Merge the dataframe with groups mean expression values and gene annotations
group.z <- merge(genes_annot, group.z, by="row.names", all = TRUE, sort = FALSE)
rownames(group.z) <- group.z$Row.names
names(group.z) <- gsub("Row.names", "Gene", names(group.z))
} else {
group.z <- cbind(rownames(group.z), group.z)
}
##### Define colours for cells background for each group and the patient vs normal difference
##### Initiate dataframe for expression mean values in each group
brks.q <- as.data.frame( matrix(NA, ncol = length(targets.list), nrow = length(seq(.05, .95, .0005)) ))
colnames(brks.q) <- targets.list
clrs.q <- as.data.frame( matrix(NA, ncol = length(targets.list), nrow = length(seq(.05, .95, .0005))+1 ))
colnames(clrs.q) <- targets.list
for ( group in c(targets.list, "Patient vs Normal", paste0("Patient vs ", cancer)) ) {
brks.q[[group]] <- quantile(group.z[, group], probs = seq(.05, .95, .0005), na.rm = TRUE)
#brks.q[[group]] <- sort(group.z[, group])
clrs_pos.q <- round(seq(255, 150, length.out = length(brks.q[[group]])/2 + 1.5), 0) %>%
{paste0("rgb(255,", ., ",", ., ")")}
clrs_neg.q <- rev(round(seq(255, 150, length.out = length(brks.q[[group]])/2 - 0.5), 0)) %>%
{paste0("rgb(", .,",", .,",", "255)")}
clrs.q[[group]] <- c(clrs_neg.q, clrs_pos.q)
}
##### Subset the expression data to include only the user-defined genes
group.z <- group.z[ rownames(group.z) %in% genes, ]
names(group.z)[1] <- "Gene"
#### Add variants information to the expression table - if exists. Note, "TIER" and "CONSEQUENCE" columns are required
if( !is.null(mut_annot) && "TIER" %in% colnames(mut_annot) && length(genes) > 0 ) {
mut_annot <- mut_annot[mut_annot$SYMBOL %in% genes,]
#### keep only varaints that has the lowest tier value. Multiple varaints detected in same gene but with higher tier will be added to additional column "CONSEQUENCE_OTHER". Applies to the ones that may have multiple mutations and hence tiers
##### First, create a list of genes to store multiple variants
mut_consequence <- vector("list", length(unique(mut_annot$SYMBOL)))
mut_consequence <- setNames(mut_consequence, unique(mut_annot$SYMBOL) )
##### Record all varaints detected in individual genes
if ( nrow(mut_annot) > 0 ) {
for ( i in 1:nrow(mut_annot) ) {
mut_consequence[[ mut_annot$SYMBOL[i] ]] <- unique(c( mut_consequence[[ mut_annot$SYMBOL[i] ]], mut_annot$CONSEQUENCE[i] ))
}
mut_annot$CONSEQUENCE_OTHER <- "-"
}
##### Remove the first elements since these variant consequences will be reported as the "canonical" CONSEQUENCE
mut_consequence <- lapply(mut_consequence, function(x) x[-1])
##### Order variant entires based on tier info, to make sure that the varaints with the lowest tier are reported first
mut_annot <- mut_annot[ order(mut_annot$TIER), ]
##### Remove rows with duplicated gene symbols
mut_annot <- mut_annot[!duplicated(mut_annot$SYMBOL),]
rownames(mut_annot) <- mut_annot$SYMBOL
##### Add other provided variants consequences for individual genes
for ( gene in rownames(mut_annot) ) {
if ( length(mut_consequence[[ gene ]]) > 0 ) {
mut_annot$CONSEQUENCE_OTHER[ match(gene, mut_annot$SYMBOL) ] <- mut_consequence[[ gene ]]
}
}
#### merge the variants information with the dataframe
mut_annot <- mut_annot[ , -c(match("SYMBOL", names(mut_annot))), drop=FALSE]
group.z <- merge(group.z, mut_annot, by = "row.names", all = TRUE, sort = FALSE)
rownames(group.z) <- group.z$Row.names
group.z <- group.z[ , -c(match("Row.names", names(group.z))), drop=FALSE]
}
##### Add CN data if provided
if ( !is.null(cn_data) && length(genes) > 0 ) {
##### Get the position of "Patient vs Cancer" column
col_idx <- grep(paste0("Patient vs ", cancer), names(group.z))
##### Now place the CN data after the "Patient vs Cancer" column
group.z <- add_column(group.z, round(cn_data[ rownames(group.z), "CN"], digits=2), .after = col_idx)
colnames(group.z)[ col_idx+1 ] <- "Patient (CN)"
}
##### Add fusion genes annotation
if ( !is.null(fusion_genes) && length(genes) > 0 ) {
group.z <- merge(group.z, fusion_genes, by="row.names", all = TRUE, sort = FALSE)
rownames(group.z) <- group.z$Row.names
group.z <- group.z[ , -c(match("Row.names", names(group.z))), drop=FALSE]
}
##### Add cancer gene resources info
if ( !is.null(OncoKB_genes) && length(genes) > 0 ) {
group.z <- merge(group.z, OncoKB_genes, by="row.names", all = TRUE, sort = FALSE)
rownames(group.z) <- group.z$Row.names
group.z <- group.z[ , -c(match("Row.names", names(group.z))), drop=FALSE]
}
##### Include only queried genes
group.z <- group.z[ genes, ]
##### Add links to external gene annotation resourses
if ( ext_links && length(genes) > 0 ) {
##### Place the external links after the "Patient vs Cancer" column
##### Get the position of "Patient vs Cancer" column
col_idx <- grep(paste0("Patient vs ", cancer), names(group.z))
group.z <- add_column(group.z, NA, .after = col_idx)
names(group.z)[ col_idx+1 ] <- "ext_links"
for ( gene in genes ) {
##### Provide link to VICC meta-knowledgebase ( https://search.cancervariants.org )
group.z$ext_links[ group.z$Gene==gene ] <- paste0("<a href='https://search.cancervariants.org/#", gene, "' target='_blank'>VICC</a>")
##### Provide link to OncoKB
if ( gene %in% rownames(ref_genes.list[["oncokb_genes"]]) & ref_genes.list[["oncokb_genes"]][gene, "OncoKB"] == "Yes" ) {
group.z$ext_links[ group.z$Gene == gene ] <- paste( group.z$ext_links[ group.z$Gene==gene ] , paste0("<a href='http://oncokb.org/#/gene/", gene, "' target='_blank'>OncoKB</a>"), sep = ", ")
}
##### Provide link to CIViC database druggable genes ( https://civicdb.org )
if ( gene %in% caner_genes_annot.list[["civic_clin_evid"]]$gene ) {
group.z$ext_links[ group.z$Gene==gene ] <- paste( group.z$ext_links[ group.z$Gene==gene ] , paste0("<a href='", unique(caner_genes_annot.list[["civic_clin_evid"]][ caner_genes_annot.list[["civic_clin_evid"]]$gene == gene , "gene_civic_url"]), "' target='_blank'>CIViC</a>"), sep = ", ")
}
}
names(group.z) <- gsub("ext_links", "External resources", names(group.z))
}
##### Attach links to GeneCards and Ensembl (if provided). Here we assume that gene names are
for ( gene in genes ) {
if ( "ENSEMBL" %in% names(group.z) ) {
if ( !is.na(group.z$ENSEMBL[ group.z$Gene==gene ]) ) {
group.z$ENSEMBL[ group.z$Gene==gene ] <- paste0("<a href='http://ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=", group.z$ENSEMBL[ group.z$Gene==gene], "' target='_blank'>", group.z$ENSEMBL[ group.z$Gene == gene ], "</a>")
}
}
group.z$Gene[ group.z$Gene==gene ] <- paste0("<a href='https://www.genecards.org/cgi-bin/carddisp.pl?gene=", gene, "' target='_blank'>", gene, "</a>")
}
##### Order the data by CN values (to allow filtering based on CN information) and then by the highest absolute values for Patient vs Normal difference (to allow filtering based on z-score differences)
if ( !is.null(cn_data) && length(genes) > 0 ) {
group.z <- group.z[ order(abs(group.z[, "Patient vs Normal"]), decreasing = cn_decrease), ]
group.z <- group.z[ order(group.z[ ,col_idx+1 ], decreasing = cn_decrease), ]
##### Order the data by increasing TIER category (to allow filtering based on tier information) and then by the highest absolute values for Patient vs Normal difference (to allow filtering based on z-score differences)
} else if ( !is.null(mut_annot) && length(genes) > 0 ) {
group.z <- group.z[ order(abs(group.z[, "Patient vs Normal"]), decreasing = TRUE), ]
group.z <- group.z[ order(group.z$TIER), ]
##### Otherwise order table by the highest absolute values for Patient vs Normal difference
} else if ( length(genes) > 0 ) {
group.z <- group.z[ order(abs(group.z[, "Patient vs Normal"]), decreasing = TRUE), ]
}
if ( !is.null(cn_data) ) {
##### Generate a table with genes annotations and coloured expression values in each group
dt.table <- DT::datatable( data = group.z, filter="none", rownames = FALSE, extensions = c('Buttons','Scroller'), options = list(pageLength = 10, dom = 'Bfrtip', buttons = c('excel', 'csv', 'pdf','copy','colvis'), scrollX = TRUE, deferRender = TRUE, scrollY = 200, scroller = TRUE), width = 800, caption = htmltools::tags$caption( style = 'caption-side: top; text-align: left; color:grey; font-size:100% ;'), escape = FALSE) %>%
DT::formatStyle( columns = names(group.z), `font-size` = '12px', 'text-align' = 'center' ) %>%
##### Colour cells according to the expression values quantiles in each group
DT::formatStyle(columns = targets.list[1],
backgroundColor = DT::styleInterval(brks.q[[targets.list[1]]], clrs.q[[targets.list[1]]])) %>%
DT::formatStyle(columns = targets.list[2],
backgroundColor = DT::styleInterval(brks.q[[targets.list[2]]], clrs.q[[targets.list[2]]])) %>%
DT::formatStyle(columns = targets.list[3],
backgroundColor = DT::styleInterval(brks.q[[targets.list[3]]], clrs.q[[targets.list[3]]])) %>%
DT::formatStyle(columns = "Patient vs Normal",
backgroundColor = DT::styleInterval(brks.q[["Patient vs Normal"]], clrs.q[["Patient vs Normal"]])) %>%
DT::formatStyle(columns = paste0("Patient vs ", cancer),
backgroundColor = DT::styleInterval(brks.q[[paste0("Patient vs ", cancer)]], clrs.q[[paste0("Patient vs ", cancer)]])) %>%
DT::formatStyle(columns = "Patient (CN)", background = DT::styleColorBar(base::range(group.z[ ,"Patient (CN)" ], na.rm = TRUE), 'lightblue'), backgroundSize = '98% 88%', backgroundRepeat = 'no-repeat', backgroundPosition = 'center')
} else {
##### Generate a table with genes annotations and coloured expression values in each group
dt.table <- DT::datatable( data = group.z, filter="none", rownames = FALSE, extensions = c('Buttons','Scroller'), options = list(pageLength = 10, dom = 'Bfrtip', buttons = c('excel', 'csv', 'pdf','copy','colvis'), scrollX = TRUE, deferRender = TRUE, scrollY = 200, scroller = TRUE), width = 800, caption = htmltools::tags$caption( style = 'caption-side: top; text-align: left; color:grey; font-size:100% ;'), escape = FALSE) %>%
DT::formatStyle( columns = names(group.z), `font-size` = '12px', 'text-align' = 'center' ) %>%
##### Colour cells according to the expression values quantiles in each group
DT::formatStyle(columns = targets.list[1],
backgroundColor = DT::styleInterval(brks.q[[targets.list[1]]], clrs.q[[targets.list[1]]])) %>%
DT::formatStyle(columns = targets.list[2],
backgroundColor = DT::styleInterval(brks.q[[targets.list[2]]], clrs.q[[targets.list[2]]])) %>%
DT::formatStyle(columns = targets.list[3],
backgroundColor = DT::styleInterval(brks.q[[targets.list[3]]], clrs.q[[targets.list[3]]])) %>%
DT::formatStyle(columns = "Patient vs Normal",
backgroundColor = DT::styleInterval(brks.q[["Patient vs Normal"]], clrs.q[["Patient vs Normal"]])) %>%
DT::formatStyle(columns = paste0("Patient vs ", cancer),
backgroundColor = DT::styleInterval(brks.q[[paste0("Patient vs ", cancer)]], clrs.q[[paste0("Patient vs ", cancer)]]))
}
group.z$SYMBOL <- rownames(group.z)
return( list(dt.table, group.z) )
}
##### Generate table with drugs targeting selected set of genes using info from CIViC database (https://civicdb.org/)
civicDrugTable <- function(genes, civic_var_summaries, civic_clin_evid, evid_type = "Predictive", var_type = "mutation") {
##### Initialize data frame to the about drug-target info from CIViC
drug.info <- setNames(data.frame(matrix(ncol = 18, nrow = 0)), c("Gene", "Variant", "variant_types", "drugs", "nct_ids", "evidence_level", "evidence_type", "evidence_direction", "clinical_significance", "rating", "civic_actionability_score", "Disease", "phenotypes", "pubmed_id", "variant_origin", "representative_transcript", "representative_transcript2", "last_review_date"))
evid_levels <- list("A" = "A: Validated association", "B" = "B: Clinical evidence", "C" = "C: Case study", "D" = "D: Preclinical evidence", "E" = "E: Inferential association")
##### Loop thourgh each gene and check if they are druggable
for ( gene in genes) {
##### Get summary info about druggable genes
if ( gene %in% civic_clin_evid$gene ) {
##### Extract info about all reported variants's clinical evidence for queried gene
clin.evid.info <- civic_clin_evid[ civic_clin_evid$gene == gene , ]
##### Use more descriptive evidence level info
for ( level in unique(clin.evid.info$evidence_level) ) {
clin.evid.info$evidence_level[ clin.evid.info$evidence_level == level ] <- evid_levels[[ level ]]
}
##### Subset table to include only variants with the evidence type of interest
clin.evid.info <- clin.evid.info[ clin.evid.info$evidence_type == evid_type, ]
if ( nrow(clin.evid.info) > 0 ) {
##### Provide link to CIViC clinical evidence summary
clin.evid.info$evidence_type <- paste0("<a href='", clin.evid.info$evidence_civic_url, "' target='_blank'>", clin.evid.info$evidence_type, "</a>")
##### Provide link to CIViC gene summary
clin.evid.info$gene_civic_url <- paste0("<a href='", clin.evid.info$gene_civic_url, "' target='_blank'>", gene, "</a>")
names(clin.evid.info)[ names(clin.evid.info) =="gene_civic_url" ] <- "Gene"
##### Provide link to CIViC variants summary
clin.evid.info$variant_civic_url <- paste0("<a href='", clin.evid.info$variant_civic_url, "' target='_blank'>", clin.evid.info$variant, "</a>")
names(clin.evid.info)[ names(clin.evid.info) =="variant_civic_url" ] <- "Variant"
##### Provide link to ClinicalTrials.gov variants summary based on NCT IDs
for ( nct_id in clin.evid.info$nct_ids ) {
if ( !is.empty(nct_id) ) {
##### Deal with multiple NCT IDs (separated by comma)
nct_id_url <- gsub(" '" , "'", paste(gsub("/ " , "/", paste("<a href='https://clinicaltrials.gov/ct2/show/", unlist(strsplit(nct_id, split=",", fixed=TRUE)) , "' target='_blank'>", unlist(strsplit(nct_id, split=",", fixed=TRUE)), "</a>")), collapse = ", "))
clin.evid.info$nct_ids[ clin.evid.info$nct_ids==nct_id ] <- nct_id_url
}
}
##### Provide link to PubMed variants summary
clin.evid.info$pubmed_id <- paste0("<a href='https://www.ncbi.nlm.nih.gov/pubmed/", clin.evid.info$pubmed_id, "' target='_blank'>", clin.evid.info$pubmed_id, "</a>")
##### Provide link to Disease Ontology
clin.evid.info$doid <- paste0("<a href='http://www.disease-ontology.org/?id=DOID:", clin.evid.info$doid, "' target='_blank'>", clin.evid.info$disease, "</a>")
names(clin.evid.info)[ names(clin.evid.info) =="doid" ] <- "Disease"
##### Extract info about all variants it that gene
var.info <- civic_var_summaries[ civic_var_summaries$gene == gene , ]
var.info <- var.info[, c("variant", "variant_types", "civic_actionability_score")]
var.info[,"variant_types"] <- gsub("_", " ", var.info[,"variant_types"])
var.info[,"variant_types"] <- gsub(",", ", ", var.info[,"variant_types"])
##### Merge about all variants it that gene and clinical evidence info
clin.evid.info <- merge(clin.evid.info, var.info, by = "variant", all.x = TRUE)
##### Filter drug matching info depending on the variant type
var_type.keep <- NULL
if ( var_type == "mutation" ) {
##### Remove entries containing "EXPRESSION", "AMPLIFICATION", "DELETION", "METHYLATION", "WILD TYPE", "FUSION", "COPY", "REARRANGEMENT", "PHOSPHORYLATION", "TRANSCRIPT", "GAIN", "LOSS"
var_type.keep <- c(var_type.keep, grep( "EXPRESSION", clin.evid.info$variant, invert=TRUE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "AMPLIFICATION", clin.evid.info$variant, invert=TRUE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "DELETION", clin.evid.info$variant, invert=TRUE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "METHYLATION", clin.evid.info$variant, invert=TRUE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "WILD TYPE", clin.evid.info$variant, invert=TRUE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "FUSION", clin.evid.info$variant, invert=TRUE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "REARRANGEMENT", clin.evid.info$variant, invert=TRUE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "PHOSPHORYLATION", clin.evid.info$variant, invert=TRUE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "COPY", clin.evid.info$variant, invert=TRUE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "TRANSCRIPT", clin.evid.info$variant, invert=TRUE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "GAIN", clin.evid.info$variant, invert=TRUE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "LOSS", clin.evid.info$variant, invert=TRUE, ignore.case=TRUE))
clin.evid.info <- clin.evid.info[ -c(unique(var_type.keep)), ]
} else if ( var_type == "expression" ) {
##### Keep only entries containing "EXPRESSION", "FUSION", "TRANSCRIPT", "ALTERATION"
var_type.keep <- c(var_type.keep, grep( "EXPRESSION", clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "FUSION", clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "TRANSCRIPT", clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "ALTERATION", clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
clin.evid.info <- clin.evid.info[ c(unique(var_type.keep)), ]
} else if ( var_type == "fusion" ) {
##### Keep only entries containing "FUSION", "ALTERATION", "[gene]-", "-[gene]"
##### Keep only entries containing "FUSION", "ALTERATION", "-"
var_type.keep <- c(var_type.keep, grep( "FUSION", clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( paste0(gene, "-"), clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( paste0("-", gene), clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "ALTERATION", clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
clin.evid.info <- clin.evid.info[ c(unique(var_type.keep)), ]
} else if ( var_type == "copy_gain" ) {
##### Keep only entries containing "AMPLIFICATION", "COPY", "GAIN", "ALTERATION"
var_type.keep <- c(var_type.keep, grep( "AMPLIFICATION", clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "COPY", clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "GAIN", clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "ALTERATION", clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
clin.evid.info <- clin.evid.info[ c(unique(var_type.keep)), ]
} else if ( var_type == "copy_loss" ) {
##### Keep only entries containing "DELETION", "COPY", "LOSS", "ALTERATION"
var_type.keep <- c(var_type.keep, grep( "DELETION", clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "COPY", clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "LOSS", clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
var_type.keep <- c(var_type.keep, grep( "ALTERATION", clin.evid.info$variant, invert=FALSE, ignore.case=TRUE))
clin.evid.info <- clin.evid.info[ c(unique(var_type.keep)), ]
}
}
if ( nrow(clin.evid.info) > 0 ) {
##### Subset table to include only most important info
clin.evid.info <- clin.evid.info[ , names(drug.info)]
##### Add drugs info for subsequent gene
drug.info <- rbind(drug.info, clin.evid.info)
}
}
}
##### Use more friendly column names for the table
names(drug.info) <- c("Gene", "Variant", "Variant type", "Drugs", "Clinical trials", "Evidence level", "Evidence type", "Evidence direction", "Clinical significance", "Trust rating", "Actionability score", "Disease", "Phenotypes", "PubMed ID", "Variant origin", "Representative transcript", "Representative transcript 2", "Review date")
##### Generate a table
dt.table <- DT::datatable( data = drug.info, filter = "none", rownames = FALSE, extensions = c('Buttons','Scroller'), options = list(pageLength = 10, dom = 'Bfrtip', buttons = c('excel', 'csv', 'pdf','copy','colvis'), scrollX = TRUE, deferRender = TRUE, scrollY = 200, scroller = TRUE), width = 800, caption = htmltools::tags$caption(style = 'caption-side: top; text-align: left; color:grey; font-size:100% ;'), escape = FALSE) %>%
DT::formatStyle( columns = names(drug.info), `font-size` = '12px', 'text-align' = 'center' ) %>%
##### Colour cells according to evidence level and trust rating
DT::formatStyle(columns = "Evidence level",
backgroundColor = DT::styleEqual(c("A: Validated association", "B: Clinical evidence", "C: Case study", "D: Preclinical evidence", "E: Inferential association"), c("mediumseagreen", "deepskyblue", "mediumpurple", "darkorange", "coral")) ) %>%
#DT::formatStyle(columns = "Trust rating",
# backgroundColor = DT::styleEqual(c(1:5), c(rev(round(seq(0, 200, length.out = 5), 0) %>% {paste0("rgb(", .,",200,", ., ")")}))) )
DT::formatStyle(columns = "Trust rating",
backgroundColor = DT::styleEqual(c(1:5), c("coral", "azure", "lightskyblue", "palegreen", "mediumseagreen")) )
return( list(dt.table, drug.info) )
}
##### Generate bezier curves-like plot representing gene fusion events
fusionsBezierPlot <- function(fusion_annot, genes_annot) {
##### Get the genes chromosomes...
chr1 <- paste0("chr", fusion_annot$SEQNAME)
chr2 <- paste0("chr", fusion_annot$SEQNAME.1)
##### ...positions
pos1 <- fusion_annot$GENESEQSTART
pos2 <- fusion_annot$GENESEQSTART.1
##### ... and the events type
type <- paste(fusion_annot$pizzly.fusions.annot.fusions_abundant, fusion_annot$pizzly.fusions.annot.fusions_cancer, sep = "-")
type <- gsub("Yes-Yes","Abundant and cancer gene(s)", type)
type <- gsub("Yes--","Abundant transcript(s)", type)
type <- gsub("--Yes","Cancer gene(s)", type)
type <- gsub("---","Other", type)
#### Prepare x-axis coordinates info for ggplot
##### This part of the script converts the genomic positions from hg38 to coordinates that can be plotted on the ggplot x-axis.
##### Start with calculating the whole genome length. Here we consider chromosomes 1-22, X and Y
genome.length <- sum(seqlengths(Hsapiens)[1:24])
##### Now calculate fake chromosomes' start positions so that they match with the x-axis coordinates in the ggplot
chrs_fake_starts <- vector("list", 24)
chrs_fake_starts <- setNames(chrs_fake_starts, names(Hsapiens)[1:24] )
##### Chromosome 1 has coordingate 0
chrs_fake_starts[["chr1"]] <- 0
##### The coordinates for the remaining chromosomes will be calculated by adding the lengths of individual preceding chromosomes
length_sum <- 0
for ( i in 2:length(chrs_fake_starts) ) {
#cat(paste("\nThe fake start position for " , names(chrs_fake_starts)[i], " is ", length_sum + as.numeric(seqlengths(Hsapiens)[[i-1]]), sep=""))
# cat(paste("\nLength of " , names(chrs_fake_starts)[i-1], " = ", as.numeric(seqlengths(Hsapiens)[[i-1]]), " and the sum of the preceding chromosomes = ", length_sum, ".\n\n", sep=""))
length_sum <- length_sum + as.numeric(seqlengths(Hsapiens)[[i-1]])
chrs_fake_starts[[names(Hsapiens)[i]]] <- length_sum
}
##### Calculate the coordinates for x-axis labels (chr1, chr2...) for ggplot by adding the half-lenght of each chrosomome to its fake start
chrs_fake_label.pos <- vector("list", 24)
chrs_fake_label.pos <- setNames(chrs_fake_label.pos, names(Hsapiens)[1:24] )
for ( i in 1:length(chrs_fake_starts) ) {
chrs_fake_label.pos[[names(Hsapiens)[i]]] <- seqlengths(Hsapiens)[[i]]/2 + chrs_fake_starts[[names(Hsapiens)[i]]]
# cat(paste("\nThe x-axis coordinate for " , names(chrs_fake_starts)[i], " label is ", chrs_fake_label.pos[[names(Hsapiens)[i]]], " = ", seqlengths(Hsapiens)[[i]]/2, " (half-length) + ", chrs_fake_starts[[names(Hsapiens)[i]]]," (fake start)", sep=""))
}
#### Calculate ggplot x-axis coordinates for fusion events
##### Calculate the coordinates to draw bezier curves by adding the fusion events position info to the fake start coordinates of corresponding chromosomes
pos1_fake <- vector("list", nrow(fusion_annot))
pos2_fake <- vector("list", nrow(fusion_annot))
for ( i in 1:nrow(fusion_annot) ) {
# cat(paste("\nCalculations for fusion event: " , paste( chr1[i], pos1[i], sep=" " ), "-", paste( chr2[i], pos2[i], sep=" " ), sep=""))
# cat(paste("\nThe x-axis coordinate for position 1 is ", chrs_fake_starts[[chr1[i]]] + pos1[i], " = ", chrs_fake_starts[[chr1[i]]], " (the fake start of ", chr1[i],") + ", pos1[i], " (the real position 1)", sep=""))
# cat(paste("\nThe x-axis coordinate for position 2 is ", chrs_fake_starts[[chr2[i]]] + pos2[i], " = ", chrs_fake_starts[[chr2[i]]], " (the fake start of ", chr2[i],") + ", pos2[i], " (the real position 2).\n", sep=""))
pos1_fake[[i]] <- chrs_fake_starts[[chr1[i]]] + pos1[i]
pos2_fake[[i]] <- chrs_fake_starts[[chr2[i]]] + pos2[i]
}
##### Get random number for the bezier curves' heigths and caluclate the middle point for each bezier curve
beziers.height <- runif(nrow(fusion_annot), 1, 2)
beziers.mid <- unlist(pos1_fake)+(unlist(pos2_fake)-unlist(pos1_fake))/2
##### Create data-frame with beziers curves info
beziers <- data.frame(
x = c(rbind( unlist(pos1_fake), beziers.mid, unlist(pos2_fake) )),
y = c(rbind( 0.2, beziers.height, 0.2 ) ),
type = rep( paste( chr1, make.names(pos1, unique=TRUE), chr2, make.names(pos2, unique=TRUE), sep="_" ), each=3),
group = rep( type, each=3)
)
##### Generate a bezier curves-like plot representing fusion events
p <- ggplot() + geom_bezier(aes(x= x, y = y, group = type, color = group ), data = beziers, show.legend = TRUE, size = 0.2) +
##### Remove default axes labels and grey backgroud
theme(axis.title.x=element_blank(), axis.text.x= element_blank(), axis.ticks.x=element_blank(), axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank(),
##### ...and the grey backgroud
panel.background = element_rect(fill = NA),
##### ...change the legend parameters
legend.title=element_text(size=6), legend.text=element_text(size=10), legend.key.size = unit(1,"line"), legend.key= element_blank(), legend.position = c(0.85,0.7) ) +
##### Set the axes limits
scale_x_continuous(limits = c(1, genome.length)) +
scale_y_continuous(limits = c(0, 2)) +
##### Add chromosomes boundaries
geom_segment(aes(x = c(1,unlist(chrs_fake_starts)[2:24],genome.length) , xend = c(1,unlist(chrs_fake_starts)[2:24],genome.length), y = 0, yend = 0.2), colour = 'grey', size = 0.2) +
labs( color = "") +
##### Add chromosomes labels
annotate(geom = 'text', label = names(chrs_fake_label.pos), x = unlist(chrs_fake_label.pos), y = 0.1, size = 2, angle = 45)
##### Add gene fusion labels
#annotate(geom = 'text', label = paste(fusion_annot$fusion_data.geneA.name, fusion_annot$fusion_data.geneB.name, sep="-"), x = beziers.mid, y = beziers.height-0.5, size = 1.5)
return( p )
}
##### Load libraries
suppressMessages(library(edgeR))
suppressMessages(library(preprocessCore))
suppressMessages(library(rapportools))
suppressMessages(library(edgeR))
suppressMessages(library(kableExtra))
suppressMessages(library(tidyverse))
suppressMessages(library(knitr))
suppressMessages(library(magick))
suppressMessages(library(matrixStats))
suppressMessages(library(ggplot2))
suppressMessages(library(ggforce))
suppressMessages(library(NOISeq))
suppressMessages(library(package=paste0("EnsDb.Hsapiens.v", params$ensembl_version), character.only = TRUE))
suppressMessages(library(package=paste0("BSgenome.Hsapiens.UCSC.hg", params$ucsc_genome_assembly), character.only = TRUE)) # required to get chromosomes lengts for fusionsBezierPlot function generating Bezier plot to present gene fusions in the genomics context
##### Load reference datasets
##### Define the reference datasets based on user-defined tissue of sample origin
ref_tissue <- list( "pancreas" = c(paste(params$ref_data_dir, "Datasets_list_PDAC.txt", sep="/"), "Normal (pancreas)", "PDAC"), "cervix" = c(paste(params$ref_data_dir, "Datasets_list_cervical_SCC.txt", sep="/"), "Normal (cervix uteri)", "Cervical squamous cell carcinoma") )
tissue <- params$tissue
normal_group <- ref_tissue[[tissue]][2]
cancer_group <- ref_tissue[[tissue]][3]
##### Create a list with reference datasets
ref_datasets <- c(tissue)
ref_datasets.list <- vector("list", length(ref_datasets))
names(ref_datasets.list) <- ref_datasets
##### Create a list with various sets of genes
ref_genes <- c("genes_cancer", "oncokb_genes", "genes_immune")
ref_genes.list <- vector("list", length(ref_genes))
names(ref_genes.list) <- ref_genes
##### Create a list with cancer genes annotations
caner_genes_annot <- c("oncokb_clin_vars", "oncokb_all_vars")
caner_genes_annot.list <- vector("list", length(caner_genes_annot))
names(caner_genes_annot.list) <- caner_genes_annot
##### Get patient data dir and sample file name
dataDir <- unlist(strsplit(params$count_file, split='/', fixed=TRUE))
sampleName <- gsub("-ready.counts", "", dataDir[length(dataDir)])
dataDir <- paste(dataDir[-c(length(dataDir))], collapse ="/")
##### Read in reference datasets and merge them with sample data. This part outputs a vector with first element containing the merged data and second element containing merged targets info
ref_datasets.list[[tissue]] <- combineDatasets(params$sample_name, params$count_file, ref_tissue[[tissue]][1])
names(ref_datasets.list[[tissue]]) <- c("combined_data", "sample_annot")
##### Read in selected genes list
ref_genes.list[["genes_cancer"]] <- read.table(paste(params$ref_data_dir, params$genes_cancer, sep="/"), sep="\t", as.is=TRUE, header=FALSE, row.names=NULL, quote="")[ ,1 ]
ref_genes.list[["oncokb_genes"]] <- read.table(paste(params$ref_data_dir, params$oncokb_genes, sep="/"), sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, quote="", comment.char = "")
ref_genes.list[["genes_immune"]] <- read.table(paste(params$ref_data_dir, params$genes_immune, sep="/"), sep="\t", as.is=TRUE, header=FALSE, row.names=NULL, quote="")
##### Read in mutation data for investigate sample
##### Get the WGS data batch name
batch <- unlist(strsplit(params$batch, split='/', fixed=TRUE))
batch <- batch[length(batch)]
##### Check if PCGR (mutation) output file exists
pcgr_file <- paste(params$batch, "pcgr", paste0(batch, "-somatic.pcgr_acmg.grch37.snvs_indels.tiers.tsv"), sep = "/")
runPcgrChunk <- TRUE
if ( file.exists(pcgr_file) ) {
ref_genes.list[["pcgr"]] <- read.table(pcgr_file, sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, fill = TRUE)
##### Simplify the variants types
ref_genes.list[["pcgr"]]$CONSEQUENCE <- gsub("_variant", "", ref_genes.list[["pcgr"]]$CONSEQUENCE)
ref_genes.list[["pcgr"]]$CONSEQUENCE <- gsub("_", " ", ref_genes.list[["pcgr"]]$CONSEQUENCE)
##### Simplify tiers' annotations and AFs
ref_genes.list[["pcgr"]]$TIER <- gsub("TIER ", "", ref_genes.list[["pcgr"]]$TIER)
ref_genes.list[["pcgr"]]$AF_TUMOR <- round(ref_genes.list[["pcgr"]]$AF_TUMOR, digits = 2)
ref_genes.list[["pcgr"]]$AF_NORMAL <- round(ref_genes.list[["pcgr"]]$AF_NORMAL, digits = 2)
} else {
ref_genes.list[["pcgr"]] <- NULL
runPcgrChunk <- FALSE
}
##### Check if purple (CN) output file exists
purple_file <- paste(params$batch, "purple", paste0(batch, ".purple.gene.cnv"), sep = "/")
runPurpleChunk <- TRUE
if ( file.exists(purple_file) ) {
ref_genes.list[["purple"]] <- read.table(purple_file, sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, fill = TRUE)
} else {
ref_genes.list[["purple"]] <- NULL
runPurpleChunk <- FALSE
}
##### Check if manta (structural variants (SVs)) file exists
sv_file <- paste(params$batch, "structural", paste0(batch, "-sv-prioritize-manta-pass.tsv"), sep = "/")
runSVsChunk <- TRUE
if ( file.exists(sv_file) ) {
ref_genes.list[["sv"]] <- read.table(sv_file, sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, fill = TRUE)
} else {
ref_genes.list[["sv"]] <- NULL
runSVsChunk <- FALSE
}
##### Read in OncoKB (http://oncokb.org) annotations
caner_genes_annot.list[["oncokb_clin_vars"]] <- read.table(paste(params$ref_data_dir, params$oncokb_clin_vars, sep="/"), sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, quote="")
caner_genes_annot.list[["oncokb_all_vars"]] <- read.table(paste(params$ref_data_dir, params$oncokb_all_vars, sep="/"), sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, quote="", fill = TRUE)
##### Read in CIViC (https://civicdb.org/) annotations
caner_genes_annot.list[["civic_var_summaries"]] <- read.table(paste(params$ref_data_dir, params$civic_var_summaries, sep="/"), sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, quote="", fill = TRUE)
caner_genes_annot.list[["civic_clin_evid"]] <- read.table(paste(params$ref_data_dir, params$civic_clin_evid, sep="/"), sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, quote="", fill = TRUE)
##### Read in Cancer Biomarkers database (https://www.cancergenomeinterpreter.org/biomarkers) annotations. This is mainly used to annotate reported fusion events
caner_genes_annot.list[["cancer_biomarkers_trans"]] <- read.table(paste(params$ref_data_dir, params$cancer_biomarkers_trans, sep="/"), sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, quote="", fill = TRUE)
##### Data transformation and filtering
##### For differential expression and related analyses, gene expression is rarely considered at the level of raw counts since libraries sequenced at a greater depth will result in higher counts. Rather, it is common practice to transform raw counts onto a scale that accounts for such library size differences. Here we convert the read count data into log2-counts per million (***log-CPM***) using function from *[edgeR](https://bioconductor.org/packages/release/bioc/html/edgeR.html){target="_blank"}* package. Genes with very low counts across all libraries provide little evidence for differential expression. In the biological point of view, a gene must be expressed at some minimal level before it is likely to be translated into a protein or to be biologically important. In addition, the pronounced discretenes of these counts interferes with some of the statistical approximations that are used later in the pipeline. These genes should be filtered out prior to further analysis.
##### Loop through combined datasets
for ( tissue in names(ref_datasets.list) ) {
counts <- ref_datasets.list[[tissue]][["combined_data"]]
target <- ref_datasets.list[[tissue]][["sample_annot"]]
##### Create EdgeR DGEList object
y <- DGEList(counts=counts, group=target$Target)
##### Add datasets name for each sample
y$samples$dataset <- target$Dataset
##### Filtering to remove low expressed genes. Users should filter with CPM rather than filtering on the counts directly, as the latter does not account for differences in library sizes between samples. Here we keep only genes that have CPM of 1
keep <- rowSums(cpm(y)>1) >= ncol(counts)/10
y.filtered <- y[keep, , keep.lib.sizes=FALSE]
ref_datasets.list[[tissue]][["combined_data_processed"]] <- y.filtered
}
# cat("The CPM of 1 (cut-off for removing low expressed genes) corresponds to", round(min(as.numeric(colSums(counts)*1e-6)), digits=0), "reads in sample with the lowest sequencing depth, and", round(max(as.numeric(colSums(counts)*1e-6)), digits=0), "reads in sample with the greatest sequencing depth\n")
##### Data normalisation
##### During the sample preparation or sequencing process, external factors that are not of biological interest can affect the expression of individual samples. For example, samples processed in the first batch of an experiment can have higher expression overall when compared to samples processed in a second batch. It is assumed that all samples should have a similar range and distribution of expression values. Normalisation for sample-specific effectss is required to ensure that the expression distributions of each sample are similar across the entire experiment. Normalisation by the method of *[trimmed mean of M-values](https://www.ncbi.nlm.nih.gov/pubmed/20196867){target="_blank"} (TMM)* is performed using the *calcNormFactors* function in *[edgeR](https://bioconductor.org/packages/release/bioc/html/edgeR.html){target="_blank"}*. The normalisation factors calculated here are used as a scaling factor for the library sizes. TMM is the recommended for most RNA-Seq data where the majority (more than half) of the genes are believed not differentially expressed between any pair of the samples.
##### Adjust for RNA composition effect. Calculate scaling factors for the library sizes with calcNormFactors function using trimmed mean of M-values (TMM) between each pair of samples. Note, that the raw read counts are used to calculate the normalisation factors
##### Loop through combined datasets
for ( tissue in names(ref_datasets.list) ) {
y.filtered <- ref_datasets.list[[tissue]][[3]]
y.filtered.norm <- calcNormFactors(y.filtered, method = "TMM")
##### Transformations from the raw-scale to CPM
y.filtered.norm.cpm <- cpm(y.filtered.norm, normalized.lib.sizes=TRUE, log=TRUE, prior.count=0.25)
ref_datasets.list[[tissue]][[3]] <- y.filtered.norm.cpm
}
##### Principal component analysis (PCA)
##### Loop through combined datasets and perform PCA
for ( tissue in names(ref_datasets.list) ) {
target <- ref_datasets.list[[tissue]][["sample_annot"]]
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
ref_datasets.list[[tissue]][["pca"]] <- pca(data, target)
}
##### Loop through combined, BUT NOT PROCESSED, datasets and annotate ALL genes. This part is mainly required for biotype detection step
for ( tissue in names(ref_datasets.list) ) {
##### Convert data into a data frame to make the Ensembl ID and gene symbol matches (with merge function)
data <- ref_datasets.list[[tissue]][["combined_data"]]
data.df <- as.data.frame(cbind(rownames(data), data))
colnames(data.df)[1] <- "ENSEMBL"
##### Get genes annotation and genomic locations (Use Ensembl annotation version 86 (Oct 2016), the most recent as of Oct 2018)
edb <- EnsDb.Hsapiens.v86
##### Get keytypes for gene SYMBOL
keys <- keys(edb, keytype="GENEID")
##### Get genes genomic coordiantes
gene_info <- ensembldb::select(edb, keys=keys, columns=c("GENEID", "GENEBIOTYPE", "GENENAME", "SEQNAME", "GENESEQSTART", "GENESEQEND"), keytype="GENEID")
names(gene_info) <- gsub("GENEID", "ENSEMBL", names(gene_info))
names(gene_info) <- gsub("GENENAME", "SYMBOL", names(gene_info))
##### Limit genes annotation to those genes for which sample expression measurments are available
gene_info <- gene_info[ gene_info$ENSEMBL %in% data.df$ENSEMBL, ]
##### Remove rows with duplicated ENSEMBL IDs
gene_info = gene_info[!duplicated(gene_info$ENSEMBL),]
rownames(gene_info) <- gene_info$ENSEMBL
##### Remove rows with duplicated gene symbols (Y_RNAs, SNORs, LINC0s etc)
gene_info = gene_info[!duplicated(gene_info$SYMBOL),]
##### Merge genes genomic coordinates info with their annotation and expression data
data.annot <- merge(gene_info, data.df, by = "ENSEMBL", all.x = FALSE)
rownames(data.annot) <- data.annot$ENSEMBL
##### Get data matrix with gene symbols
ref_datasets.list[[tissue]][["gene_annot_all"]] <- data.annot[, c("SYMBOL", "GENEBIOTYPE", "ENSEMBL", "SEQNAME", "GENESEQSTART", "GENESEQEND")]
}
##### Loop through combined datasets and annotate genes
for ( tissue in names(ref_datasets.list) ) {
##### Convert data into a data frame to make the Ensembl ID and gene symbol matches (with merge function)
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
data.df <- as.data.frame(cbind(rownames(data), data))
colnames(data.df)[1] <- "ENSEMBL"
##### Merge genes genomic coordinates info with their annotation and expression data
data.annot <- merge(gene_info, data.df, by = "ENSEMBL", all.x = FALSE)
##### Keep only genes fo which gene symbol is available
data.annot <- data.annot[!(is.na(data.annot$SYMBOL) | data.annot$SYMBOL==""), ]
rownames(data.annot) <- data.annot$SYMBOL
##### Get data matrix with gene symbols
#data <- data.annot[, colnames(data)]
#data <- apply(data.annot[, colnames(data)], 2, as.numeric)
#rownames(data) <- data.annot$SYMBOL
ref_datasets.list[[tissue]][["combined_data_processed"]] <- apply(data.annot[, colnames(data)], 2, as.numeric)
rownames(ref_datasets.list[[tissue]][["combined_data_processed"]]) <- data.annot$SYMBOL
ref_datasets.list[[tissue]][["gene_annot"]] <- data.annot[, c("SYMBOL", "GENEBIOTYPE", "ENSEMBL", "SEQNAME", "GENESEQSTART", "GENESEQEND")]
}
##### Combine PMCC gene panel and OncoKB cancer genes
genes_cancer <- ref_genes.list[["oncokb_genes"]]
genes_cancer$PMCC <- rep("No", nrow(genes_cancer))
for ( gene in unlist(ref_genes.list[["genes_cancer"]]) ) {
##### Check if the PMCC genes is aleady reported in OncoKB
if ( gene %in% genes_cancer$Hugo.Symbol ) {
genes_cancer[ genes_cancer$Hugo.Symbol==gene, ]$PMCC <- "Yes"
genes_cancer[ genes_cancer$Hugo.Symbol==gene, 2] <- as.numeric(genes_cancer[ genes_cancer$Hugo.Symbol==gene, 2]) + 1
##### Add if not present
} else {
genes_cancer <- rbind(genes_cancer, c(gene, 1, "No", rep("", 8), "Yes"))
}
}
rownames(genes_cancer) <- genes_cancer$Hugo.Symbol
names(genes_cancer) <- c("Gene", "No. of resources", "OncoKB", "Oncogene", "TSG", "MSK-IMPACT", "MSK-HEME", "Foundation One", "Foundation One Heme", "Vogelstein", "Sanger CGC", "PMCC")
genes_cancer <- genes_cancer[,c("Oncogene", "TSG", "No. of resources", "OncoKB", "MSK-IMPACT", "MSK-HEME", "Foundation One", "Foundation One Heme", "Vogelstein", "Sanger CGC", "PMCC")]
genes_cancer[ genes_cancer=="No" ] <- "-"
genes_cancer[ genes_cancer=="" ] <- "-"
ref_genes.list[["genes_cancer"]] <- genes_cancer
ref_genes.list[["oncokb_genes"]] <- genes_cancer[ rownames(genes_cancer) %in% ref_genes.list[["oncokb_genes"]]$Hugo.Symbol, ]
##### Combine expression data with mutation and CN data if available
cn_data <- ref_genes.list[["purple"]]
expr_data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
##### Extract partient sample data
expr_data <- expr_data[, ncol(expr_data)]
##### Perform Z-score transformation of the expression values
expr_data.z <- as.vector(scale(expr_data))
names(expr_data.z) <- names(expr_data)
##### Remove entries with missing gene symbol (mainly variants in intergenic regions)
cn_data <- cn_data[ cn_data$Gene %!in% "", ]
##### Calculate the mean CN for each gene
cn_data$MeanCopyNumber <- rowMeans(cbind(cn_data$MinCopyNumber, cn_data$MaxCopyNumber))
##### Deal with negative CN values
cn_data$MeanCopyNumber[ cn_data$MeanCopyNumber < 0 ] <- 0
##### Get the percentiles from from the CN values
#cn_data.percent <- quantile(cn_data$MeanCopyNumber, probs = seq(0, 1, .05), na.rm = TRUE)
# ##### Draw histogram of CN data
# p <- plot_ly(x = cn_data$MeanCopyNumber, type = 'histogram', name = "Copy-number data", width = 800, height = 500) %>%
#
# ##### Add 10th percentile threshold
# add_lines(y = seq(0,1000, 100), x = rep(cn_data.percent[2],11),
# line = list(color = "black", dash = "dash"), opacity = 0.4,
# name = "10th percentile", showlegend = TRUE) %>%
#
# ##### Add 50th percentile
# add_lines(y = seq(0,1000, 100), x = rep(cn_data.percent[11],11),
# line = list(color = "black", dash = "dash"), opacity = 0.7,
# name = "50th percentile", showlegend = TRUE) %>%
#
# ##### Add 90th percentile threshold
# add_lines(y = seq(0,1000, 100), x = rep(cn_data.percent[20],11),
# line = list(color = "black", dash = "dash"), opacity = 1,
# name = "80th percentile", showlegend = TRUE) %>%
#
# layout(xaxis = list( range=c(0,3), title = "Copy-number values"), yaxis = list( title = "Frequency"), margin = list(l=50, r=50, b=50, t=50, pad=4), autosize = F)
##### Keep only altered genes with CN values below 10th percentile and above 90th percentile
#cn_data <- cn_data[ cn_data$MaxCopyNumber < cn_data.percent[2] | cn_data$MinCopyNumber > cn_data.percent[20], ]
##### Keep only altered genes with CN values below loss threshold (default CN value = 1) and above gain threshold (default CN value = 4)
cn_data <- cn_data[ cn_data$MeanCopyNumber < params$cn_loss | cn_data$MeanCopyNumber > params$cn_gain, ]
##### Add mutation data if available
if ( !is.null(ref_genes.list[["pcgr"]]) ) {
mut_data <- ref_genes.list[["pcgr"]]
##### Remove entries with missing gene symbol (mainly variants in intergenic regions)
mut_data <- mut_data[ mut_data$SYMBOL %!in% "", ]
##### Prepare mutation data to include multiple mutations per gene
##### Initiate variable for the gene mutation status for each gene
gene.mut <- as.matrix(rep("not mutated", length(expr_data.z)))
colnames(gene.mut) <- "Mutation"
rownames(gene.mut) <- names(expr_data.z)
for ( i in 1:nrow(gene.mut) ) {
##### Check if any mutations are reported for each gene
if ( rownames(gene.mut)[i] %in% mut_data$SYMBOL ) {
##### Deal with multiple mutations per gene
if ( length(mut_data[ mut_data$SYMBOL %in% rownames(gene.mut)[i], ]$CONSEQUENCE) > 1 ) {
gene.mut[ rownames(gene.mut)[i],"Mutation" ] <- "multiple hits"
} else {
gene.mut[ rownames(gene.mut)[i],"Mutation" ] <- mut_data[ mut_data$SYMBOL %in% rownames(gene.mut)[i], ]$CONSEQUENCE
}
}
}
##### If there is no expression value for a specific gene than assume it's not expressed at all and assign the lowest value observed in that sample
for ( gene in unique(mut_data$SYMBOL) ) {
if ( gene %!in% rownames(gene.mut) ) {
expr_data.z <- c(expr_data.z, min(expr_data.z))
names(expr_data.z)[length(expr_data.z)] <- gene
##### Deal with multiple mutations per gene
if ( length(mut_data[ mut_data$SYMBOL %in% gene, ]$CONSEQUENCE) > 1 ) {
gene.mut <- rbind( gene.mut, "multiple hits")
} else {
gene.mut <- rbind( gene.mut, mut_data[ mut_data$SYMBOL %in% gene, ]$CONSEQUENCE )
}
rownames(gene.mut)[nrow(gene.mut)] <- gene
}
}
##### Subset expression, mutation and copy-number data to include only overlapping genes
genes.intersect <- intersect(intersect(rownames(gene.mut), cn_data$Gene), names(expr_data.z))
gene.mut.sub <- gene.mut[ rownames(gene.mut) %in% genes.intersect, ]
cn_data.sub <- cn_data[ cn_data$Gene %in% genes.intersect, ]
expr_data.z.sub <- expr_data.z[ names(expr_data.z) %in% genes.intersect ]
##### Make sure thay are all in the same order
gene.mut.sub <- gene.mut.sub[ genes.intersect ]
rownames(cn_data.sub) <- cn_data.sub$Gene
cn_data.sub <- cn_data.sub[ genes.intersect, ]
expr_data.z.sub <- expr_data.z.sub[ genes.intersect ]
##### Prepare data frame
ref_datasets.list[[tissue]][["expr_mut_cn_data"]] <- data.frame(names(expr_data.z.sub), cn_data.sub$MeanCopyNumber, expr_data.z.sub, gene.mut.sub)
colnames(ref_datasets.list[[tissue]][["expr_mut_cn_data"]]) <- c("Gene", "CN", "mRNA", "Mutation")
} else {
##### Skip the step for processing mutation info and deal with expression and copy-number data
##### Subset expression and copy-number data to include only overlapping genes
genes.intersect <- intersect(cn_data$Gene, names(expr_data.z))
cn_data.sub <- cn_data[ cn_data$Gene %in% genes.intersect, ]
expr_data.z.sub <- expr_data.z[ names(expr_data.z) %in% genes.intersect ]
##### Make sure thay are all in the same order
rownames(cn_data.sub) <- cn_data.sub$Gene
cn_data.sub <- cn_data.sub[ genes.intersect, ]
expr_data.z.sub <- expr_data.z.sub[ genes.intersect ]
##### Prepare data frame
ref_datasets.list[[tissue]][["expr_mut_cn_data"]] <- data.frame(names(expr_data.z.sub), cn_data.sub$MeanCopyNumber, expr_data.z.sub)
colnames(ref_datasets.list[[tissue]][["expr_mut_cn_data"]]) <- c("Gene", "CN", "mRNA")
}
#read in the pizzly fusion calls
pizzly.fusions <- read.table(file = paste(dataDir, "pizzly", paste0(sampleName, "-flat.tsv"), sep = "/"), header = TRUE)
quant <- read.table(file = paste(dataDir, "kallisto/quant_pizzly_post/abundance.tsv", sep = "/"), header = TRUE)
#sort and filter quantification file on tpm values. First, grep only the transcript ids for fusion genes from quantification #file. Currently filtering on quantiles. Selected 0.997 because that reduces the #final fusion calls to the value we are #interested in (~15)
quant.fusions.only.transcripts <- quant[grep(":", quant$target_id), ]
quant.sorted.filtered <- dplyr::filter(arrange(quant.fusions.only.transcripts, desc(quant.fusions.only.transcripts$tpm)), tpm >= (quantile(quant.fusions.only.transcripts$tpm, 0.99)))
#initialize an empty dataframe
result <- data.frame()
#let's try using for loop for iterating over pizzly.fusions dataframe and get transcriptID and fusion gene pair information.
#can also filter quant.sorted.filtered$target_id to have only fusion gene target ids (that is two transcripts instead of one-
#this will increase speed
for (row in 1:nrow(pizzly.fusions)){
y <- strsplit(as.character(pizzly.fusions[row, 7]), "\\;")
y <- unname(y)
for (i in 1:length(y[[1]])){
if (y[[1]][i] %in% quant.sorted.filtered$target_id){
#creating a new dataframe for the filtered pizzly results
result <- rbind(result, data.frame(pizzly.fusions[row,]))
}
}
}
#remove duplicated values from result filtered using expression count (as multiple transcripts might support fusion between same gene) and sort the results by number of events (first by split count and then paircount)
deduped.result <- unique(result)
idx <- order(deduped.result$splitcount, deduped.result$paircount, decreasing = TRUE)
deduped.sorted.result <- deduped.result[idx, ]
#Extract only those fusion genes that are in cancer genes list
result.cancer_genes <- data.frame()
for (row in 1:nrow(pizzly.fusions)){
if(pizzly.fusions[row,1] %in% rownames(ref_genes.list[["genes_cancer"]]) | pizzly.fusions[row,3] %in% rownames(ref_genes.list[["genes_cancer"]])) {
#creating a new dataframe for extracting pizzly rows with cancer gene hits
result.cancer_genes <- rbind(result.cancer_genes, data.frame(pizzly.fusions[row,]))
}
}
#ordering pizzly's cancer genes results on the basis of read count values
idx2 <- order(result.cancer_genes$splitcount, result.cancer_genes$paircount, decreasing = TRUE)
result.sorted.cancer_genes <- result.cancer_genes[idx2,]
#extracting rows from pizzly results that are not in re-quant i.e. deduped.sorted.result or cancer genes list i.e. result.sorted.cancer_genes
result.other_genes <- pizzly.fusions[ rownames(pizzly.fusions) %!in% c(rownames(result.sorted.cancer_genes), rownames(deduped.sorted.result)), ]
#ordering pizzly's other genes results on the basis of read count values
idx3 <- order(result.other_genes$splitcount, result.other_genes$paircount, decreasing = TRUE)
result.sorted.other_genes <- result.other_genes[idx3,]
#combing all the three above sorted dataframes
pizzly.fusions2 <- rbind(deduped.sorted.result, result.sorted.cancer_genes, result.sorted.other_genes)
##### Flag known fusions based on info from Cancer Biomarkers database (https://www.cancergenomeinterpreter.org/biomarkers)
known_translocations <- caner_genes_annot.list[["cancer_biomarkers_trans"]]
known_translocations$cancer_acronym <- gsub(";", ", ", known_translocations$cancer_acronym)
known_translocations$source <- gsub(";", ", ", known_translocations$source)
##### Extract gene pairs involved in reported gene fusions
trans.pairs <- strsplit(known_translocations$translocation, split='__', fixed=TRUE)
trans.pairs <- data.frame(matrix(unlist(trans.pairs), nrow=length(trans.pairs), byrow=TRUE), stringsAsFactors = FALSE)
names(trans.pairs) <- c("geneA", "geneB")
known_translocations <- cbind(known_translocations, trans.pairs)
trans.pairs <- apply( trans.pairs , 1 , paste , collapse = "" )
##### Add columns for info about reported fusions
pizzly.fusions2 <- cbind(pizzly.fusions2, data.frame(matrix("", ncol = 4, nrow = nrow(pizzly.fusions2)), stringsAsFactors = FALSE))
colnames(pizzly.fusions2)[-c(1:7)] <- c("Reported", "Effector_gene", "Source", "Cancer_acronym")
##### Add annotations about known fusion events
##### Loop through all genes involved in reported gene fusions and check if present in pizzly results
for ( i in 1:nrow(pizzly.fusions2) ) {
geneA <- pizzly.fusions2$geneA.name[i]
geneB <- pizzly.fusions2$geneB.name[i]
##### First check if the exact reported gene pairs were detected by pizzly
if ( paste(geneA, geneB, sep="-") %in% trans.pairs ) {
pizzly.fusions2$Reported[i] <- trans.pairs[ trans.pairs %in% paste(geneA, geneB, sep="-") ]
pizzly.fusions2[ i, c("Effector_gene", "Cancer_acronym", "Source")] <- known_translocations[ trans.pairs %in% paste(geneA, geneB, sep="-"), c("effector_gene", "cancer_acronym", "source") ]
} else if ( paste(geneB, geneA, sep="-") %in% trans.pairs ) {
pizzly.fusions2$Reported[i] <- trans.pairs[ trans.pairs %in% paste(geneB, geneA, sep="-") ]
pizzly.fusions2[ i, c("Effector_gene", "Cancer_acronym", "Source")] <- known_translocations[ trans.pairs %in% paste(geneB, geneA, sep="-"), c("effector_gene", "cancer_acronym", "source") ]
##### Now check is at lease one of the pizzly results genes was previously reporeted
} else {
##### Check pizzly genes A and genes A in reported fusions
if ( geneA %in% known_translocations$geneA ) {
known_gene <- known_translocations$geneA %in% geneA
pizzly.fusions2$Reported[i] <- unique(known_translocations$geneA[ known_gene ])
pizzly.fusions2[ i, c("Cancer_acronym", "Source")] <- known_translocations[ known_gene, c("cancer_acronym", "source") ]
##### Flag if it's effector gene
if ( geneA == known_translocations[ known_gene , "effector_gene" ] ) {
pizzly.fusions2[ i, "Effector_gene"] <- as.character(geneA)
}
##### Check pizzly genes A and genes B in reported fusions
} else if ( geneA %in% known_translocations$geneB ) {
known_gene <- known_translocations$geneB %in% geneA
pizzly.fusions2$Reported[i] <- unique(known_translocations$geneB[ known_gene ])
pizzly.fusions2[ i, c("Cancer_acronym", "Source")] <- known_translocations[ known_gene, c("cancer_acronym", "source") ]
##### Flag if it's effector gene
if ( geneA == known_translocations[ known_gene , "effector_gene" ] ) {
pizzly.fusions2[ i, "Effector_gene"] <- as.character(geneA)
}
}
##### Check pizzly genes B and genes A in reported fusions
if ( geneB %in% known_translocations$geneA ) {
known_gene <- known_translocations$geneA %in% geneB
pizzly.fusions2$Reported[i] <- unique(known_translocations$geneA[ known_gene ])
pizzly.fusions2[ i, c("Cancer_acronym", "Source")] <- known_translocations[ known_gene, c("cancer_acronym", "source") ]
##### Flag if it's effector gene
if ( geneB == known_translocations[ known_gene , "effector_gene" ] ) {
pizzly.fusions2[ i, "Effector_gene"] <- as.character(geneB)
}
##### Check pizzly genes B and genes B in reported fusions
} else if ( geneB %in% known_translocations$geneB ) {
known_gene <- known_translocations$geneB %in% geneB
pizzly.fusions2$Reported[i] <- unique(known_translocations$geneB[ known_gene ])
pizzly.fusions2[ i, c("Cancer_acronym", "Source")] <- known_translocations[ known_gene, c("cancer_acronym", "source") ]
##### Flag if it's effector gene
if ( geneB == known_translocations[ known_gene , "effector_gene" ] ) {
pizzly.fusions2[ i, "Effector_gene"] <- as.character(geneB)
}
}
}
}
##### Rearrange the table to move the transcripts list at the end
pizzly.fusions2 <- pizzly.fusions2 %>% dplyr::select(-"transcripts.list","transcripts.list")
##### Index duplicated rows = these are fusions which involve both highly abundant transcripts and cancer genes and so are duplicated in pizzly.fusions2 data frame
dup.index <- which( duplicated(pizzly.fusions2) | duplicated(pizzly.fusions2[nrow(pizzly.fusions2):1, ])[nrow(pizzly.fusions2):1] )
##### Add column indicating fusions containing high abundant transcripts and known cancer genes
fusions_abundant <- c(rep("-", nrow(pizzly.fusions2)))
fusions_abundant[ c(1:nrow(deduped.result)) ] <- "Yes"
fusions_cancer <- c(rep("-", nrow(pizzly.fusions2)))
fusions_cancer[ c((nrow(deduped.result)+1):(nrow(deduped.result) + nrow(result.cancer_genes))) ] <- "Yes"
pizzly.fusions3 <- cbind(pizzly.fusions2, fusions_abundant, fusions_cancer)
##### Indicate duplicated rows and flag them as both, those which involve both highly abundant transcripts and cancer genes
pizzly.fusions3$fusions_abundant[ dup.index ] <- "Yes"
pizzly.fusions3$fusions_cancer[ dup.index ] <- "Yes"
##### Remove duplicated rows
if ( any(duplicated(pizzly.fusions3)) ) {
pizzly.fusions3 <- pizzly.fusions3[ -c(which( duplicated(pizzly.fusions3) )), ]
}
##### Re-order data frame to prioritise fusions with highly abundant transcripts and cancer genes
pizzly.fusions3 <- pizzly.fusions3[ rev(with(pizzly.fusions3, order(fusions_abundant, fusions_cancer))), ]
##### Get the number of reads supporting fusions of interest
result_reads <- data.frame()
for (row in 1:nrow(deduped.result)){
y <- strsplit(as.character(deduped.result[row, 7]), "\\;")
y <- unname(y)
for (i in 1:length(y[[1]])){
if (y[[1]][i] %in% quant.sorted.filtered$target_id){
#creating a new dataframe for the reads supporting individual transcript ID for each filtered fusion gene pair
result_reads_inter <- data.frame(deduped.result[row,1:6])
result_reads_inter$transcriptID = y[[1]][i]
tpm = quant.sorted.filtered[grep(y[[1]][i], quant.sorted.filtered$target_id), ]$tpm
result_reads_inter$tpm = tpm
result_reads <- rbind(result_reads, result_reads_inter)
}
}
}
##### Rearrange the table to move the transcripts list at the end
result_reads <- result_reads %>% dplyr::select(-"transcriptID","transcriptID")
##### Annotate fusion genes
##### Get data to annotate fusion genes
fusion_genes_annot <- ref_datasets.list[[tissue]][["gene_annot_all"]][ , c("ENSEMBL", "SYMBOL", "SEQNAME", "GENESEQSTART", "GENESEQEND") ]
##### Keep only fusions for which both genes have gene symbol (and genomics location) available
pizzly.fusions3 <- pizzly.fusions3[ pizzly.fusions3$geneA.id %in% fusion_genes_annot$ENSEMBL, ]
pizzly.fusions3 <- pizzly.fusions3[ pizzly.fusions3$geneB.id %in% fusion_genes_annot$ENSEMBL, ]
pizzly.fusions.annot <- pizzly.fusions3
pizzly.fusions.annot$order <- 1:nrow(pizzly.fusions.annot)
##### Get genomic info for fusions genes
fusion_annot1 <- merge(fusion_genes_annot, pizzly.fusions.annot[ , c("geneA.id", "order")], by = 1, sort=FALSE)
fusion_annot1 <- fusion_annot1[ order(fusion_annot1$order), ]
fusion_annot2 <- merge(fusion_genes_annot, pizzly.fusions.annot[ , c("geneB.id", "order")], by = 1, sort=FALSE)
fusion_annot2 <- fusion_annot2[ order(fusion_annot2$order), ]
fusion_annot <- cbind(fusion_annot1, fusion_annot2, pizzly.fusions.annot$fusions_abundant, pizzly.fusions.annot$fusions_cancer)
colnames(fusion_annot) = make.names(colnames(fusion_annot), unique=TRUE)
##### Create a list of known fusion genes
fusion_annot_reported.geneA <- known_translocations[ , c("effector_gene", "geneA")]
fusion_annot_reported.geneB <- known_translocations[ , c("effector_gene", "geneB")]
names(fusion_annot_reported.geneA) <- gsub( "geneA", "Reported fusion", names(fusion_annot_reported.geneA))
names(fusion_annot_reported.geneB) <- gsub( "geneB", "Reported fusion", names(fusion_annot_reported.geneB))
##### Combine reported fusion genes A and B
fusion_annot_reported <- rbind(fusion_annot_reported.geneA, fusion_annot_reported.geneB)
names(fusion_annot_reported) <- gsub( "effector_gene", "Effector gene", names(fusion_annot_reported))
fusion_annot_reported <- fusion_annot_reported[ , c( "Reported fusion", "Effector gene")]
##### Check which fusion genes are also effector genes
for( i in 1:nrow(fusion_annot_reported) ) {
if ( fusion_annot_reported$"Effector gene"[i] == fusion_annot_reported$"Reported fusion"[i] ) {
fusion_annot_reported$"Effector gene"[i] <- "Yes"
} else {
fusion_annot_reported$"Effector gene"[i] <- "No"
}
}
##### Remove duplicated entries (in case where same gene was listed as geneA and geneB)
fusion_annot_reported <- fusion_annot_reported[ !duplicated(fusion_annot_reported$"Reported fusion") , ]
rownames(fusion_annot_reported) <- fusion_annot_reported$"Reported fusion"
fusion_annot_reported$"Reported fusion" <- "Yes"
##### Summarise the reference cohorts samples
target <- ref_datasets.list[[tissue]][["sample_annot"]]
ref_normal <- table(target$Target)[names(table(target$Target))==normal_group]
ref_cancer <- table(target$Target)[names(table(target$Target))==cancer_group]
Summary
The following reference patient cohorts were used for the analysis:
Out of the 52683 input genes:
NOTE: the 34370 genes with no/low expression are indicated in BLANK cells with missing values in expression summary tables in Mutated genes, Cancer genes, Copy-number altered genes and Immune Response Markers sections.
##### Use NOISeq package () https://bioconductor.org/packages/release/bioc/vignettes/NOISeq/inst/doc/NOISeq.pdf ) to perfrom biotype detection analysis
##### Get the data and genes annotation info
data <- ref_datasets.list[[tissue]][["combined_data"]]
targets <- ref_datasets.list[[tissue]][["sample_annot"]][, c("Dataset", "Target")]
targets$Target[targets$Target==normal_group] <- "Normal"
targets$Target[targets$Target==params$sample_name] <- "Patient"
targets$Target <- as.factor(targets$Target)
##### Use the genes annotation to define gene biotypes
data_annot <- ref_datasets.list[[tissue]][["gene_annot_all"]]
##### Keep only genes for which annotation is available
gene_biotypes <- data_annot$GENEBIOTYPE
names(gene_biotypes) <- rownames(data_annot)
##### Convert data into a NOISeq object
NOISeq_data <- readData( data = data, biotype = gene_biotypes, factors = targets)
##### Perfrom biotype detection analysis
NOISeq_biodetection <- NOISeq::dat(NOISeq_data, k = 0, type = "biodetection", factor = "Target")
##### Compute expression distribution per biotype
NOISeq_countsbio = NOISeq::dat(NOISeq_data, factor = "Target", type = "countsbio")
##### Generate per-sample biodetection plot
for ( sample in c("Patient", cancer_group, "Normal") ) {
explo.plot(NOISeq_biodetection, samples = sample, plottype = "persample", verbose=FALSE)
}
##### Generate per-sample biodetection plot
for ( sample in c("Patient", cancer_group, "Normal") ) {
explo.plot(NOISeq_countsbio, samples = sample, plottype = "boxplot", norm = FALSE)
}
##### Generate cross-groups biodetection plot (patient vs normal)
explo.plot(NOISeq_biodetection, samples = c(2, 1), plottype = "comparison", toplot = "protein_coding")
##### Generate cross-groups biodetection plot (patient vs cancer)
explo.plot(NOISeq_biodetection, samples = c(2, 3), plottype = "comparison", toplot = "protein_coding")
##### Generate cross-groups biodetection plot (cancer vs normal)
explo.plot(NOISeq_biodetection, samples = c(3, 1), plottype = "comparison", toplot = "protein_coding")
mRNA expression levels of genes containing single nucleotide variants (SNVs) or insertions/deletions (indels), obtained from the PCGR report, in patient’s sample and their average mRNA expression in samples from cancer patients and healthy individuals. NOTE, only PCGR tier 1-3 variants are reported.
PCGR report for this sample IS AVAILABLE.
Out of the 101 mutated genes 0 include tier 1-3 variants. Of these, the expression of 0 was reliably measured in patient’s sample. The remaining 0 genes are either not expressed or their expression level is too low to be detected (indicated in BLANK cells with missing values).
##### Generate expression summary table for cancer genes from OncoKB and PMCC
targets <- ref_datasets.list[[tissue]][["sample_annot"]]
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
##### Consider only genes with mutations calssified within Tiers 1-3
genes <- unique(ref_genes.list[["pcgr"]][ ref_genes.list[["pcgr"]]$TIER %in% c("1", "2", "3" ), ]$SYMBOL)
##### Deal with no genes
if ( length(genes) == 0 ) {
genes <- NULL
}
mut_genes.expr.z <- exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], mut_annot = ref_genes.list[["pcgr"]][, c("SYMBOL", "TIER", "CONSEQUENCE", "VARIANT_CLASS", "AF_TUMOR", "GENOMIC_CHANGE", "PROTEIN_CHANGE")], ext_links = TRUE, type = "z")
##### Present the expression summary table
mut_genes.expr.z[[1]]
Table legend
The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each cancer gene. Variants’ tier, consequence, class and tumour allele freuqnecy (AF), as well as genomic and protein change are also provided based on information from PCGR report. In case of multiple varaints detected in single gene the variant with the lowest tier is reported and other potential consequences are listed in column CONSEQUENCE_OTHER. Genes are ordered by increasing variants TIER. SD - standard deviation
mut_genes.expr.perc <- exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], mut_annot = ref_genes.list[["pcgr"]][, c("SYMBOL", "TIER", "CONSEQUENCE", "VARIANT_CLASS", "AF_TUMOR", "GENOMIC_CHANGE", "PROTEIN_CHANGE")], ext_links = TRUE, type = "perc")
##### Present the expression summary table
mut_genes.expr.perc[[1]]
Table legend
The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each cancer gene. Variants’ tier, consequence, class and tumour allele freuqnecy (AF), as well as genomic and protein change are also provided based on information from PCGR report. In case of multiple varaints detected in single gene the variant with the lowest tier is reported and other potential consequences are listed in column CONSEQUENCE_OTHER. Genes are ordered by increasing variants TIER. SD - standard deviation
Expression profiles for the top 10 mutated genes with variants annotated with the lowest tier and demonstrating the greatest difference in standarised (Z-score) mRNA expression values between patient’s sample and the average mRNA expression in samples from healthy individuals.
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[1]
if ( !is.na(gene) && gene %in% rownames(data) ) {
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}
There are 0 mutated genes that include tier 1-3 variants!
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[2]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}
There are 0 mutated genes that include tier 1-3 variants!
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[3]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}
There are 0 mutated genes that include tier 1-3 variants!
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[4]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}
There are 0 mutated genes that include tier 1-3 variants!
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[5]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}
There are 0 mutated genes that include tier 1-3 variants!
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[6]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}
There are 0 mutated genes that include tier 1-3 variants!
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[7]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}
There are 0 mutated genes that include tier 1-3 variants!
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[8]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}
There are 0 mutated genes that include tier 1-3 variants!
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[9]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}
There are 0 mutated genes that include tier 1-3 variants!
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[10]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}
There are 0 mutated genes that include tier 1-3 variants!
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
mRNA expression levels of cancer genes in patient’s sample and their average mRNA expression in samples from cancer patients and healthy individuals. These include Peter Mac comprehensive cancer (PMCC) panel genes and genes considered to be cancer genes (listed here and available here) by OncoKB based on their inclusion in the following sequencing panels: MSK-IMPACT, MSK-HEME, Foundation One, Foundation One Heme, Vogelstein and Sanger Cancer Gene Census (CGC).
Out of the 1410 cancer genes the expression of 1245 was reliably measured in patient’s sample. The remaining 165 genes are either not expressed or their expression level is too low to be detected (indicated in BLANK cells with missing values).
##### Generate expression summary table for cancer genes from OncoKB and PMCC
targets <- ref_datasets.list[[tissue]][["sample_annot"]]
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
cancer_genes.expr.z <- exprTable( genes = rownames(ref_genes.list[["genes_cancer"]]), data = data, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]], ext_links = TRUE, type = "z")
##### Present the expression summary table
cancer_genes.expr.z[[1]]
Table legend
The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each cancer gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, and inclusion in various sequencing panels are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation
cancer_genes.expr.perc <- exprTable( genes = rownames(ref_genes.list[["genes_cancer"]]), data = data, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]], ext_links = TRUE, type = "perc")
##### Present the expression summary table
cancer_genes.expr.perc[[1]]
Table legend
The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each cancer gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, and inclusion in various sequencing panels are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation
Expression profiles for the top 10 altered cancer genes with the greatest difference in standarised (Z-score) mRNA expression values between patient’s sample and the average mRNA expression in samples from healthy individuals.
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[1]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[2]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[3]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[4]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[5]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[6]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[7]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[8]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[9]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[10]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Fusion events prioritisation
The prioritisation of pizzly results is performed in two steps.
NOTE: Only fusion events including genes with available genomics coordinates in Ensembl database version 86 are reported.
##### Remove some columns and rename column names for better presentation
pizzly.fusions.clean <- pizzly.fusions3[ , names(pizzly.fusions3) %!in% c("geneA.id", "geneB.id", "transcripts.list") ]
pizzly.fusions.clean <- pizzly.fusions.clean[ , c("geneA.name", "geneB.name", "paircount", "splitcount", "fusions_abundant", "fusions_cancer", "Reported", "Effector_gene", "Source", "Cancer_acronym") ]
##### Create a nice table output (with dataTable)
names(pizzly.fusions.clean) <- c("Gene A", "Gene B", "Pair count", "Split count", "Abundant transcript(s)", "Cancer gene(s)", "Fusion gene(s)", "Effector gene", "Source", "Cancer acronym")
##### Provide links to the external resources (in the "Source" column)
pizzly.fusions.clean$Source <- gsub( "cgc", paste0("<a href='https://cancer.sanger.ac.uk/cosmic/fusion' target='_blank'>cgc</a>"), pizzly.fusions.clean$Source)
pizzly.fusions.clean$Source <- gsub( "biomarker", paste0("<a href='https://www.cancergenomeinterpreter.org/biomarkers' target='_blank'>biomarker</a>"), pizzly.fusions.clean$Source)
DT::datatable( data = pizzly.fusions.clean, filter="none", rownames = FALSE, extensions = c('Buttons','Scroller'), options = list(pageLength = 10, dom = 'Bfrtip', buttons = c('excel', 'csv', 'pdf','copy','colvis'), scrollX = TRUE, deferRender = TRUE, scrollY = 200, scroller = TRUE), width = 800, escape = FALSE) %>%
DT::formatStyle( columns = names(pizzly.fusions.clean), `font-size` = '12px', 'text-align' = 'center' ) %>%
##### Highlight rows with fusions involving hihgly abundant transcripts (red) or cancer genes (blue)
DT::formatStyle( columns = colnames(pizzly.fusions.clean) %in% "Abundant transcript(s)", backgroundColor = DT::styleEqual(c("-", "Yes"), c('transparent', 'coral')) ) %>%
DT::formatStyle( columns = colnames(pizzly.fusions.clean) %in% "Cancer gene(s)", backgroundColor = DT::styleEqual(c("-", "Yes"), c('transparent', 'lightblue')) )
Table legend
Cell in RED indicate highly abundant fusion transcript(s) and those hihglighted in BLUE indicate fusions containing Cancer genes. Genes known to be involved in gene fusions are flagged based on information provided in Cancer Genome Interpreter (CGI) database.
Abundant transcript(s): gene fusion events involving highly abundant transcript(s)
Cancer gene(s): gene fusion events involving Cancer genes
Fusion gene(s): gene(s) known to be involved in tumorigenesis across cancer types via translocation
Effector gene: gene most likely to promote the tumorigenesis between the two translocation partners
Source: source from which the information has been extracted
Cancer acronym: acronym of cancer type in which the fusion event was detected and reported
AA adrenal adenoma, ABC aneurysmal bone cyst, AC adrenal cortex, ACA adrenal cortex adenoma, ACC adrenal cortex carcinoma, ACML atypical chronic myeloid leukemia, AEL acute erythroid leukemia, ALCL anaplastic large cell lymphoma, ALL acute lymphoblastic leukemia, ALM acral lentigious melanoma, AML acute myeloid lekemia, APML acute promyelocytic leukemia, AS angiosarcoma, B brain, BCC basal cell carcinoma, BCL b cell lymphoma, BLCA bladder , BLTC bladder transitional cell, BLY burkitt lymphoma, BRCA breast , BRCA/OV-PR breast/ovary predisposing, BRCAL breast lobular, BRCALU breast luminal, BT billiary tract, CANCER cancer, CANCER-PR cancer-predisposing, CCS clear cell sarcoma, CER cervix, CESC cervix squamous cell, CH cholangiocarcinoma, CLL chronic lymphocytic leukemia, CM cutaneous melanoma, CM-PR cutaneous melanoma – predisposing, CML chronic myeloid lekemia, CMP chronic myeloproliferative neoplasm, CNL chronic neutrophilic leukemia, CNS central nervous system, CNSLY central nervous system lymphoma, COC colon carcinoma, COC-PR colon carcinoma – predisposing, COCA colon adenocarcinoma, COREAD colorectal adenocarcinoma, COREAD-PR colorectal adenocarcinoma – predisposing, CS chondrosarcoma, CTCL cutaneous T-cell lymphoma, DEST desmoid tumor, DFS dermatofibrosarcoma, DLBCL diffuse large B cell lymphoma, DNT dysembryoplastic neuroephitelial tumor, DSRCT desmoplastic small round cell tumor, ECL eosinophilic chronic leukemia, ED endometrium, EDA endometrial adenocarcinoma, EDS endometrial stromal, ES endocrine system, ESCA esophagous, ESCC esophagous clear cell, ESSC esophagous squamous cell, EWS ewing sarcoma, FGCT female germ cell tumor, FH fibrous histiocytoma, FIBS-PR fibrosarcoma - predisposing, FL follicular lymphoma, G glioma, GB glioblastoma, GBM glioblastoma multiforme, GIST gastrointestinal stromal, GIST-PR gastrointestinal stromal – predisposing, HA hepatic adenoma, HB hepatic blastoma, HC hepatic carcinoma, HC-PR hepatic carcinoma – predisposing, HCL hairy cell leukemia, HEMATO hematological malignancy, HEMATO-PR hematological malignancy – predisposing, HES hyper eosinophilic advanced syndrome, HGP hemangiopericytoma, HISEC erdheim-chester histiocytosis, HISLC largerhans cell histiocytosis, HLY hodgkin lymphoma, HNC head and neck, HNSC head and neck squamous, IM immflamatory myofibroblastic, L lung, LBCL large b-cell lymphoma, LGG lower grade glioma, LGLK large granular leukemia, LIP liposarcoma, LK leukemia, LUAD lung adenocarcinoma, LUSC lung squamous cell, LY luymphoma, MA malignant astrocytoma, MALT malt lymphoma, MAS mastocytosis, MB medulloblastoma, MCL mantle cell lymphoma, MCLK mast cell leukemia, MDPS myelodisplatic proliferative syndrome, MDS myelodisplasicsyndrome, MEN meningioma, MERC merkel cell carcinoma, MESO mesothelioma, MFH myxoidfibrosarcoma, MGCT male germ cell tumor, MKB megakaryoblastic leukemia, MM multiple myeloma, MML myelomonocytic leukemia, MPN malignant peripheral nerve sheat tumor, MRT malignant rhaboid tumor, MRT-PR malignant rhaboid tumor – predisposing, MUCM mucosal melanoma, MY myelofibrosis, MYEP myoepithelioma, MYMA myeloma, MYX myxoma, NB neuroblastoma, NF neurofibromatosis, NHLY non-hodgkin lymphoma, NMC nut midline carcinoma, NSCLC non-small cell lung, NSMGCT non-seminomatous germ cell tumor, NSPH nasopharyngeal, OCSC oral cavity squamous cell, OPH ororpharynx squamous cell, OS osteosarcoma, OV ovary, OVCC ovary clear cell, OVE ovary ephitelial cell, OVG ovary granulosa cell, OVSC ovary small cell, PA pancreas, PAAC pancreas acinar, PAAD pancreas adenocarcinoma, PAIS pancreas islet, PANC pancreas neuroendocrine, PARG paraganglioma, PATH parathyroid adenoma, PCPG pheochromocytoma and paranganglioma, PEC perivascular epithelioid cell, PG pediatric glioma, PHEO pheochromacytoma, PIA pilocytic astrocytoma, PIGA pituitary gland adenoma, PPB pleuropulmonary blastoma, PRAD prostate adenocarcinoma, PTCL perypheral t-cell lymphoma, PV polycitemia vera, R renal, R-PR renal – predisposing, RB retinoblastoma, RCCC renal clear cell, REC rectal carcinoma, RHBDS rhabdomyosarcoma, RPC renal papillary cell, S sarcoma, SCC squamous cell carcinoma, SCC-PR squamous cell carcinoma – predisposing, SCGS sex-cord gonadal stromal, SCHW schwannoma, SCLC small cell lung, SG salivary glands, SGA salivary glands adenoma, SGM salivary glands mucoepidermoid, SIN small intestine, SINNE small intestine neuroendocrine, SK skin, SK-PR skin – predisposing, SM systemic mastocytosis, SOLID solid tumors, SOLID-PR solid tumors - predisposing , ST stomach, ST-PR stomach – predisposing, STAD stomach adenocarcinoma, STS soft tissue sarcoma, SYNS synovial sarcoma, T testis, TCALL t cell acute lymphoblastic leukemia, TCPL t cell prolymphocytic leukemia, TER teratoma, TH thyroid, THA thyroid adenoma, THCA thyroid carcinoma, THF thyroid follicular, THM thyroid medullary, THYM thymic, TOSC tongue squamous cell, UCEC uterine corpus endometroid carcinoma, UCS uterine carcinosarcoma, UTH urothelial, UVM uveal melanoma, WM waldenstrom macroglobullinemia, WT wilms tumor
Gene fusion events involving highly abundant transcript(s) or Cancer genes are presented in the genomic context. The table at the bottom contains genomic coordingates of individual fusion genes sorted based on their genomic location.
##### Keep only fusions with abundant transcript(s) or cancer gene(s) involved
fusion_annot_top <- fusion_annot[ fusion_annot$pizzly.fusions.annot.fusions_abundant == "Yes" | fusion_annot$pizzly.fusions.annot.fusions_cancer == "Yes" , ]
##### Generate bezier curves-like plot representing gene fusion events
fusionsBezierPlot(fusion_annot_top, ref_datasets.list[[tissue]][["gene_annot_all"]])
##### Clean the table for better presentation
fusion_annot_top.clean <- fusion_annot_top[, c("SYMBOL", "SEQNAME", "GENESEQSTART", "GENESEQEND", "SYMBOL.1", "SEQNAME.1", "GENESEQSTART.1", "GENESEQEND.1", "pizzly.fusions.annot.fusions_abundant", "pizzly.fusions.annot.fusions_cancer") ]
##### Order fusions based on the genomic location (chrom and start positions)
chrOrder <-c((1:22),"X","Y","M")
fusion_annot_top.clean$SEQNAME <- factor(fusion_annot_top.clean$SEQNAME, chrOrder, ordered=TRUE)
fusion_annot_top.clean$SEQNAME.1 <- factor(fusion_annot_top.clean$SEQNAME.1, chrOrder, ordered=TRUE)
fusion_annot_top.clean <- fusion_annot_top.clean[do.call(order, fusion_annot_top.clean[, c("SEQNAME", "SEQNAME.1", "GENESEQSTART", "GENESEQSTART.1")]), ]
names(fusion_annot_top.clean) <- c("Gene A", "Chrom (A)", "Start (A)", "End (A)", "Gene B", "Chrom (B)", "Start (B)", "End (B)", "Abundant transcript(s)", "Cancer gene(s)")
DT::datatable( data = fusion_annot_top.clean, filter="none", rownames = FALSE, extensions = c('Buttons','Scroller'), options = list(pageLength = 10, dom = 'Bfrtip', buttons = c('excel', 'csv', 'pdf','copy','colvis'), scrollX = TRUE, deferRender = TRUE, scrollY = 200, scroller = TRUE), width = 800, escape = FALSE) %>%
DT::formatStyle( columns = names(fusion_annot_top.clean), `font-size` = '12px', 'text-align' = 'center' ) %>%
##### Highlight rows with fusions involving hihgly abundant transcripts (red) or cancer genes (blue)
DT::formatStyle( columns = colnames(fusion_annot_top.clean) %in% "Abundant transcript(s)", backgroundColor = DT::styleEqual(c("-", "Yes"), c('transparent', 'coral')) ) %>%
DT::formatStyle( columns = colnames(fusion_annot_top.clean) %in% "Cancer gene(s)", backgroundColor = DT::styleEqual(c("-", "Yes"), c('transparent', 'lightblue')) )
Expression profiles for the top 5 gene fusion events detected in patient’s sample and listed within red (abundant transcripts and blue (cancer genes) sections in the Gene fusion events table).
NOTE: the Fusion events visualisation is not available for gene pairs for which no junctions were found by clinker.
##### Convert the fusion image from pdf to png and present it in the report
geneA <- pizzly.fusions2$geneA.name[1]
geneB <- pizzly.fusions2$geneB.name[1]
##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists(paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
fusion_png(geneA, geneB, paste(dataDir, "clinker", sep="/") )
}
##### Present a table with number of reads supporting this fusion
##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists( paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
fusion_result_reads <- result_reads[ result_reads$geneA.name == geneA & result_reads$geneB.name == geneB, ]
if( nrow(fusion_result_reads) !=0 ) {
kable(fusion_result_reads, row.names = FALSE, caption = paste0("Number of reads supporting ", paste(geneA, geneB, sep="-"), " fusion")) %>%
kable_styling(font_size = 12, "striped", "bordered") %>%
scroll_box(width = "100%")
}
}
mRNA expression levels of fusion genes detected in patient’s sample and their average mRNA expression (Z-score) in samples from cancer patients and healthy individuals.
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
targets <- ref_datasets.list[[tissue]][["sample_annot"]]
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
cdfPlot(gene = geneA, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)
cdfPlot(gene = geneB, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
fusion.df <- ref_datasets.list[[tissue]][["gene_annot"]]
fusion.df <- rbind(fusion.df[fusion.df$SYMBOL==geneA, ], fusion.df[fusion.df$SYMBOL==geneB, ])
genes = fusion.df$SYMBOL
##### Deal with no genes
if ( length(genes) == 0 ) {
genes <- NULL
}
exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]][,c("Oncogene", "TSG", "OncoKB")], fusion_genes = fusion_annot_reported, ext_links = TRUE, type = "z")[[1]]
Table legend
The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation
exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]][,c("Oncogene", "TSG", "OncoKB")], fusion_genes = fusion_annot_reported, ext_links = TRUE, type = "perc")[[1]]
Table legend
The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation
##### Convert the fusion image from pdf to png and present it in the report
geneA <- pizzly.fusions2$geneA.name[2]
geneB <- pizzly.fusions2$geneB.name[2]
##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists( paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
fusion_png(geneA, geneB, paste(dataDir, "clinker", sep="/") )
}
##### Present a table with number of reads supporting this fusion
##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists( paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
fusion_result_reads <- result_reads[ result_reads$geneA.name == geneA & result_reads$geneB.name == geneB, ]
if( nrow(fusion_result_reads) !=0 ) {
kable(fusion_result_reads, row.names = FALSE, caption = paste0("Number of reads supporting ", paste(geneA, geneB, sep="-"), " fusion")) %>%
kable_styling(font_size = 12, "striped", "bordered") %>%
scroll_box(width = "100%")
}
}
mRNA expression levels of fusion genes detected in patient’s sample and their average mRNA expression (Z-score) in samples from cancer patients and healthy individuals.
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
cdfPlot(gene = geneA, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)
cdfPlot(gene = geneB, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
fusion.df <- ref_datasets.list[[tissue]][["gene_annot"]]
fusion.df <- rbind(fusion.df[fusion.df$SYMBOL==geneA, ], fusion.df[fusion.df$SYMBOL==geneB, ])
genes = fusion.df$SYMBOL
##### Deal with no genes
if ( length(genes) == 0 ) {
genes <- NULL
}
exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]][,c("Oncogene", "TSG", "OncoKB")], fusion_genes = fusion_annot_reported, ext_links = TRUE, type = "z")[[1]]
Table legend
The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation
exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]][,c("Oncogene", "TSG", "OncoKB")], fusion_genes = fusion_annot_reported, ext_links = TRUE, type = "perc")[[1]]
Table legend
The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation
##### Convert the fusion image from pdf to png and present it in the report
geneA <- pizzly.fusions2$geneA.name[3]
geneB <- pizzly.fusions2$geneB.name[3]
##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists( paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
fusion_png(geneA, geneB, paste(dataDir, "clinker", sep="/") )
}
##### Present a table with number of reads supporting this fusion
##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists( paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
fusion_result_reads <- result_reads[ result_reads$geneA.name == geneA & result_reads$geneB.name == geneB, ]
if( nrow(fusion_result_reads) !=0 ) {
kable(fusion_result_reads, row.names = FALSE, caption = paste0("Number of reads supporting ", paste(geneA, geneB, sep="-"), " fusion")) %>%
kable_styling(font_size = 12, "striped", "bordered") %>%
scroll_box(width = "100%")
}
}
mRNA expression levels of fusion genes detected in patient’s sample and their average mRNA expression (Z-score) in samples from cancer patients and healthy individuals.
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
cdfPlot(gene = geneA, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)
cdfPlot(gene = geneB, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
fusion.df <- ref_datasets.list[[tissue]][["gene_annot"]]
fusion.df <- rbind(fusion.df[fusion.df$SYMBOL==geneA, ], fusion.df[fusion.df$SYMBOL==geneB, ])
genes = fusion.df$SYMBOL
##### Deal with no genes
if ( length(genes) == 0 ) {
genes <- NULL
}
exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]][,c("Oncogene", "TSG", "OncoKB")], fusion_genes = fusion_annot_reported, ext_links = TRUE, type = "z")[[1]]
Table legend
The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation
exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]][,c("Oncogene", "TSG", "OncoKB")], fusion_genes = fusion_annot_reported, ext_links = TRUE, type = "perc")[[1]]
Table legend
The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation
##### Convert the fusion image from pdf to png and present it in the report
geneA <- pizzly.fusions2$geneA.name[4]
geneB <- pizzly.fusions2$geneB.name[4]
##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists(paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
fusion_png(geneA, geneB, paste(dataDir, "clinker", sep="/") )
}
##### Present a table with number of reads supporting this fusion
##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists( paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
fusion_result_reads <- result_reads[ result_reads$geneA.name == geneA & result_reads$geneB.name == geneB, ]
if( nrow(fusion_result_reads) !=0 ) {
kable(fusion_result_reads, row.names = FALSE, caption = paste0("Number of reads supporting ", paste(geneA, geneB, sep="-"), " fusion")) %>%
kable_styling(font_size = 12, "striped", "bordered") %>%
scroll_box(width = "100%")
}
}
mRNA expression levels of fusion genes detected in patient’s sample and their average mRNA expression (Z-score) in samples from cancer patients and healthy individuals.
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
cdfPlot(gene = geneA, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)
cdfPlot(gene = geneB, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
fusion.df <- ref_datasets.list[[tissue]][["gene_annot"]]
fusion.df <- rbind(fusion.df[fusion.df$SYMBOL==geneA, ], fusion.df[fusion.df$SYMBOL==geneB, ])
genes = fusion.df$SYMBOL
##### Deal with no genes
if ( length(genes) == 0 ) {
genes <- NULL
}
exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]][,c("Oncogene", "TSG", "OncoKB")], fusion_genes = fusion_annot_reported, ext_links = TRUE, type = "z")[[1]]
Table legend
The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation
exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]][,c("Oncogene", "TSG", "OncoKB")], fusion_genes = fusion_annot_reported, ext_links = TRUE, type = "perc")[[1]]
Table legend
The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The Patient vs Normal column illustrates the difference between percentile in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation
##### Convert the fusion image from pdf to png and present it in the report
geneA <- pizzly.fusions2$geneA.name[5]
geneB <- pizzly.fusions2$geneB.name[5]
##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists(paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
fusion_png(geneA, geneB, paste(dataDir, "clinker", sep="/") )
}
##### Present a table with number of reads supporting this fusion
##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists( paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
fusion_result_reads <- result_reads[ result_reads$geneA.name == geneA & result_reads$geneB.name == geneB, ]
if( nrow(fusion_result_reads) !=0 ) {
kable(fusion_result_reads, row.names = FALSE, caption = paste0("Number of reads supporting ", paste(geneA, geneB, sep="-"), " fusion")) %>%
kable_styling(font_size = 12, "striped", "bordered") %>%
scroll_box(width = "100%")
}
}
mRNA expression levels of fusion genes detected in patient’s sample and their average mRNA expression (Z-score) in samples from cancer patients and healthy individuals.
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
cdfPlot(gene = geneA, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)
cdfPlot(gene = geneB, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
fusion.df <- ref_datasets.list[[tissue]][["gene_annot"]]
fusion.df <- rbind(fusion.df[fusion.df$SYMBOL==geneA, ], fusion.df[fusion.df$SYMBOL==geneB, ])
genes = fusion.df$SYMBOL
##### Deal with no genes
if ( length(genes) == 0 ) {
genes <- NULL
}
exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]][,c("Oncogene", "TSG", "OncoKB")], fusion_genes = fusion_annot_reported, ext_links = TRUE, type = "z")[[1]]
Table legend
The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation
exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]][,c("Oncogene", "TSG", "OncoKB")], fusion_genes = fusion_annot_reported, ext_links = TRUE, type = "perc")[[1]]
Table legend
The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation
Section overlaying the mRNA expression data with per-gene somatic copy-number (CN) data (from PURPLE) and mutation status, if available.
PURPLE report for this sample IS AVAILABLE.
Scatterplot comparing the per-gene mRNA expression values (y-axis, Z-scores), CN values (x-axis, from PURPLE). If the mutation status information is available then the genes’s colours correspond to the variant(s) consequence (from PCGR). Genes with CN values > 10 or < 0.5 are annotated. NOTE: only Cancer genes and Mutated genes with tier 1-3 variants (if mutation data is available) are presented.
##### Generate scatterplot with per-gene expression values (y-axis), CN values (x-axis) and mutation status info (colours)
suppressMessages(library(plotly))
data <- ref_datasets.list[[tissue]][["expr_mut_cn_data"]]
##### Limit the data to include only cancer genes
genes <- as.vector(data$Gene[ data$Gene %in% rownames(ref_genes.list[["genes_cancer"]]) ])
if ( !is.null(ref_genes.list[["pcgr"]]) ) {
##### Add mutated genes with vatiants tier 1-3
genes <- c( genes, unique(ref_genes.list[["pcgr"]][ ref_genes.list[["pcgr"]]$TIER %in% c("1", "2", "3" ), ]$SYMBOL))
data <- data[ data$Gene %in% genes, ]
mutCNexprPlot(data = data, mut_data = TRUE, cn_bottom = 0.5, cn_top = 3, plot_mode = params$plots_mode)
} else {
data <- data[ data$Gene %in% genes, ]
mutCNexprPlot(data = data, mut_data = FALSE, cn_bottom = 0.5, cn_top = 3, plot_mode = params$plots_mode)
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Out of the 836 genes within gained (CN values > 4) or lossed (CN values < 1) regions the expression of 836 was reliably measured in patient’s sample. The remaining 0 genes are either not expressed or their expression level is too low to be detected (indicated in BLANK cells with missing values).
Table summarising the mRNA expression values in normal, cancer and patient samples for genes with CN values > 4 (gains), based on patient’s genomic data (from PURPLE), and mutation status if available (from PCGR).
##### Generate expression summary table for per-gene expression values CN values and mutation status info (colours)
##### Keep only genes within CN gains
cn_data <- ref_datasets.list[[tissue]][["expr_mut_cn_data"]]
cn_data <- cn_data[ cn_data$CN > params$cn_gain, ]
cn_data <- cn_data[, "CN", drop=FALSE]
genes = rownames(cn_data)
##### Deal with no genes
if ( length(genes) == 0 ) {
genes <- NULL
}
##### Get expression data
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
if ( !is.null(ref_genes.list[["pcgr"]]) && !is.null(ref_genes.list[["purple"]]) ) {
mut_cn_expr_genes.expr.gains.z <- exprTable( genes = genes, data = data, cn_data = cn_data, cn_decrease = TRUE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], mut_annot = ref_genes.list[["pcgr"]][, c("SYMBOL", "TIER", "CONSEQUENCE", "VARIANT_CLASS", "AF_TUMOR", "GENOMIC_CHANGE", "PROTEIN_CHANGE")], type = "z")
##### Generate expression summary table for per-gene expression values and CN values
} else if ( !is.null(ref_genes.list[["purple"]] ) ) {
mut_cn_expr_genes.expr.gains.z <- exprTable( genes = genes, data = data, cn_data = cn_data, cn_decrease = TRUE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], type = "z")
}
##### Present the expression, CN and mutation data summary table
mut_cn_expr_genes.expr.gains.z[[1]]
Table legend
The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each cancer gene. The CN values based on patient’s genomic data are presented in Patient (CN) column with a horizontal blue bar indicating the CN value of each gene in the context of other genes. If mutation data is availbale, then the variants’ tier, consequence, class and tumour allele freuqnecy (AF), as well as genomic and protein change are also provided on right-hand side based on information from PCGR report (similar to Mutated genes section).Genes are ordered by Patient (CN) column. SD - standard deviation; CN - copy-number
##### Generate expression summary table for per-gene expression values CN values and mutation status info (colours)
if ( !is.null(ref_genes.list[["pcgr"]]) && !is.null(ref_genes.list[["purple"]]) ) {
mut_cn_expr_genes.expr.gains.perc <- exprTable( genes = genes, data = data, cn_data = cn_data, cn_decrease = TRUE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], mut_annot = ref_genes.list[["pcgr"]][, c("SYMBOL", "TIER", "CONSEQUENCE", "VARIANT_CLASS", "AF_TUMOR", "GENOMIC_CHANGE", "PROTEIN_CHANGE")], type = "perc")
##### Generate expression summary table for per-gene expression values and CN values
} else if ( !is.null(ref_genes.list[["purple"]] ) ) {
mut_cn_expr_genes.expr.gains.perc <- exprTable( genes = genes, data = data, cn_data = cn_data, cn_decrease = TRUE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], type = "perc")
}
##### Present the expression, CN and mutation data summary table
mut_cn_expr_genes.expr.gains.perc[[1]]
Table legend
The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each cancer gene. The CN values based on patient’s genomic data are presented in Patient (CN) column with a horizontal blue bar indicating the CN value of each gene in the context of other genes. If mutation data is availbale, then the variants’ tier, consequence, class and tumour allele freuqnecy (AF), as well as genomic and protein change are also provided on right-hand side based on information from PCGR report (similar to Mutated genes section). Genes are ordered by Patient (CN) column. SD - standard deviation; CN - copy-number
Table summarising the mRNA expression values in normal, cancer and patient samples for genes with CN values < 1 (losses), based on patient’s genomic data (from PURPLE), and mutation status if available (from PCGR).
##### Generate expression summary table for per-gene expression values CN values and mutation status info (colours)
##### Keep only genes within CN losses
cn_data <- ref_datasets.list[[tissue]][["expr_mut_cn_data"]]
cn_data <- cn_data[ cn_data$CN < params$cn_loss, ]
cn_data <- cn_data[, "CN", drop=FALSE]
genes = rownames(cn_data)
##### Deal with no genes
if ( length(genes) == 0 ) {
genes <- NULL
}
##### Get expression data
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
if ( !is.null(ref_genes.list[["pcgr"]]) && !is.null(ref_genes.list[["purple"]]) ) {
mut_cn_expr_genes.expr.losses.z <- exprTable( genes = genes, data = data, cn_data = cn_data, cn_decrease = FALSE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], mut_annot = ref_genes.list[["pcgr"]][, c("SYMBOL", "TIER", "CONSEQUENCE", "VARIANT_CLASS", "AF_TUMOR", "GENOMIC_CHANGE", "PROTEIN_CHANGE")], type = "z")
##### Generate expression summary table for per-gene expression values and CN values
} else if ( !is.null(ref_genes.list[["purple"]] ) ) {
mut_cn_expr_genes.expr.losses.z <- exprTable( genes = genes, data = data, cn_data = cn_data, cn_decrease = FALSE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], type = "z")
}
##### Present the expression, CN and mutation data summary table
mut_cn_expr_genes.expr.losses.z[[1]]
Table legend
The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each cancer gene. The CN values based on patient’s genomic data are presented in Patient (CN) column with a horizontal blue bar indicating the CN value of each gene in the context of other genes. If mutation data is availbale, then the variants’ tier, consequence, class and tumour allele freuqnecy (AF), as well as genomic and protein change are also provided on right-hand side based on information from PCGR report (similar to Mutated genes section).Genes are ordered by Patient (CN) column. SD - standard deviation; CN - copy-number
##### Generate expression summary table for per-gene expression values CN values and mutation status info (colours)
if ( !is.null(ref_genes.list[["pcgr"]]) && !is.null(ref_genes.list[["purple"]]) ) {
mut_cn_expr_genes.expr.losses.perc <- exprTable( genes = genes, data = data, cn_data = cn_data, cn_decrease = FALSE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], mut_annot = ref_genes.list[["pcgr"]][, c("SYMBOL", "TIER", "CONSEQUENCE", "VARIANT_CLASS", "AF_TUMOR", "GENOMIC_CHANGE", "PROTEIN_CHANGE")], type = "perc")
##### Generate expression summary table for per-gene expression values and CN values
} else if ( !is.null(ref_genes.list[["purple"]] ) ) {
mut_cn_expr_genes.expr.losses.perc <- exprTable( genes = genes, data = data, cn_data = cn_data, cn_decrease = FALSE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], type = "perc")
}
##### Present the expression, CN and mutation data summary table
mut_cn_expr_genes.expr.losses.perc[[1]]
Table legend
The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each cancer gene. The CN values based on patient’s genomic data are presented in Patient (CN) column with a horizontal blue bar indicating the CN value of each gene in the context of other genes. If mutation data is availbale, then the variants’ tier, consequence, class and tumour allele freuqnecy (AF), as well as genomic and protein change are also provided on right-hand side based on information from PCGR report (similar to Mutated genes section). Genes are ordered by Patient (CN) column. SD - standard deviation; CN - copy-number
Expression profiles for 5 genes with the highest (gains) and 5 genes with the lowest (losses) CN values and the greatest difference in standarised (Z-score) mRNA expression values between patient’s sample and the average mRNA expression in samples from healthy individuals.
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL[1]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL), "genes mapped to detected gained regions!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL[2]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL), "genes mapped to detected gained regions!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL[3]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL), "genes mapped to detected gained regions!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL[4]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL), "genes mapped to detected gained regions!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL[5]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL), "genes mapped to detected gained regions!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL[1]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL), "genes mapped to detected lossed regions!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL[2]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL), "genes mapped to detected lossed regions!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL[3]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL), "genes mapped to detected lossed regions!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL[4]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL), "genes mapped to detected lossed regions!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL[5]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL), "genes mapped to detected lossed regions!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
Section presenting expression levels for immune response markers to assess pre-existing anti-cancer immunity and likelihood of response to immunotherapy)
Out of the 53 immune response markers the expression of 51 was reliably measured in patient’s sample. The remaining 2 genes are either not expressed or their expression level is too low to be detected (indicated in BLANK cells with missing values).
##### Generate expression summary table for cancer genes from OncoKB and PMCC
targets <- ref_datasets.list[[tissue]][["sample_annot"]]
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genes <- unique(unlist(ref_genes.list[["genes_immune"]]))
##### Deal with no genes
if ( length(genes) == 0 ) {
genes <- NULL
}
immune_genes.expr.z <- exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], ext_links = TRUE, type = "z")
##### Present the expression summary table
immune_genes.expr.z[[1]]
Table legend
The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each cancer gene. Genes are ordered by decreasing absolute values in the Patient vs Normal column. SD - standard deviation
##### Generate expression summary table for cancer genes from OncoKB and PMCC
immune_genes.expr.perc <- exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], ext_links = TRUE, type = "perc")
##### Present the expression summary table
immune_genes.expr.perc[[1]]
Table legend
The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each cancer gene. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation
Expression profiles for the top 10 immune response markers with the greatest difference in standarised (Z-score) mRNA expression values between patient’s sample and the average mRNA expression in samples from healthy individuals.
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[1]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[2]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[3]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[4]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[5]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[6]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[7]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[8]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[9]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[10]
##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) && gene %in% rownames(data) ) {
cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)
} else {
cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)
List of suitable drugs based on actionable targets identified among Mutated genes, dysregulated Cancer genes, Gene fusion events and Copy-number altered genes, which can be considered in the treatment decision making process. The clinically actionable aberrations are matched based on information provided by clinical interpretations of variants in Cancer (CIViC) (Griffith et al. (2017)).
0 genes with PCGR tier 1-3 variants were screened for suitable drugs (see Mutated genes section).
Evidence pertaining to a variant’s effect on therapeutic response.
##### Generate table with drugs targeting mutated cancer genes
genes <- mut_genes.expr.z[[2]]$SYMBOL
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Predictive", var_type = "mutation")[[1]]
Table legend
Evidence Level
Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival.
##### Generate table with drugs targeting mutated cancer genes
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Prognostic", var_type = "mutation")[[1]]
Table legend
Evidence Level
Evidence pertaining to a variant’s impact on patient diagnosis.
##### Generate table with drugs targeting mutated cancer genes
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Diagnostic", var_type = "mutation")[[1]]
Table legend
Evidence Level
Evidence pertains to a variant’s role in conferring susceptibility to a disease.
##### Generate table with drugs targeting mutated cancer genes
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Predisposing", var_type = "mutation")[[1]]
Table legend
Evidence Level
The top 50 cancer genes with the greatest difference in standarised (Z-score) mRNA expression values between patient’s sample and the average mRNA expression in samples from healthy individuals were screened for suitable drugs (see Cancer genes section).
Evidence pertaining to a variant’s effect on therapeutic response.
##### Generate table with drugs targeting dysregulated cancer genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genes <- cancer_genes.expr.z[[2]]$SYMBOL[1:50]
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Predictive", var_type = "expression")[[1]]
Table legend
Evidence Level
Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival.
##### Generate table with drugs targeting dysregulated cancer genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genes <- cancer_genes.expr.z[[2]]$SYMBOL[1:50]
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Prognostic", var_type = "expression")[[1]]
Table legend
Evidence Level
Evidence pertaining to a variant’s impact on patient diagnosis.
##### Generate table with drugs targeting dysregulated cancer genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genes <- cancer_genes.expr.z[[2]]$SYMBOL[1:50]
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Diagnostic", var_type = "expression")[[1]]
Table legend
Evidence Level
Evidence pertains to a variant’s role in conferring susceptibility to a disease.
##### Generate table with drugs targeting dysregulated cancer genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genes <- cancer_genes.expr.z[[2]]$SYMBOL[1:50]
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Predisposing", var_type = "expression")[[1]]
Table legend
Evidence Level
128 gene fusion events involving highly abundant transcripts and/or cancer genes (red and blue sections in Gene fusion events table) were screened for suitable drugs.
Evidence pertaining to a variant’s effect on therapeutic response.
##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(deduped.result$geneA.name)[deduped.result$geneA.name]
genesB <- levels(deduped.result$geneB.name)[deduped.result$geneB.name]
civicDrugTable(c(genesA, genesB), civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Predictive", var_type = "fusion")[[1]]
Table legend
Evidence Level
Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival.
##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(deduped.result$geneA.name)[deduped.result$geneA.name]
genesB <- levels(deduped.result$geneB.name)[deduped.result$geneB.name]
civicDrugTable(c(genesA, genesB), civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Prognostic", var_type = "fusion")[[1]]
Table legend
Evidence Level
Evidence pertaining to a variant’s impact on patient diagnosis.
##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(deduped.result$geneA.name)[deduped.result$geneA.name]
genesB <- levels(deduped.result$geneB.name)[deduped.result$geneB.name]
civicDrugTable(c(genesA, genesB), civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Diagnostic", var_type = "fusion")[[1]]
Table legend
Evidence Level
Evidence pertains to a variant’s role in conferring susceptibility to a disease.
##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(deduped.result$geneA.name)[deduped.result$geneA.name]
genesB <- levels(deduped.result$geneB.name)[deduped.result$geneB.name]
civicDrugTable(c(genesA, genesB), civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Predisposing", var_type = "fusion")[[1]]
Table legend
Evidence Level
Evidence pertaining to a variant’s effect on therapeutic response.
##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(result.cancer_genes$geneA.name)[result.cancer_genes$geneA.name]
genesB <- levels(result.cancer_genes$geneB.name)[result.cancer_genes$geneB.name]
civicDrugTable(c(genesA, genesB), civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Predictive", var_type = "fusion")[[1]]
Table legend
Evidence Level
Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival.
##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(result.cancer_genes$geneA.name)[result.cancer_genes$geneA.name]
genesB <- levels(result.cancer_genes$geneB.name)[result.cancer_genes$geneB.name]
civicDrugTable(c(genesA, genesB), civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Prognostic", var_type = "fusion")[[1]]
Table legend
Evidence Level
Evidence pertaining to a variant’s impact on patient diagnosis.
##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(result.cancer_genes$geneA.name)[result.cancer_genes$geneA.name]
genesB <- levels(result.cancer_genes$geneB.name)[result.cancer_genes$geneB.name]
civicDrugTable(c(genesA, genesB), civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Diagnostic", var_type = "fusion")[[1]]
Table legend
Evidence Level
Evidence pertains to a variant’s role in conferring susceptibility to a disease.
##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(result.cancer_genes$geneA.name)[result.cancer_genes$geneA.name]
genesB <- levels(result.cancer_genes$geneB.name)[result.cancer_genes$geneB.name]
civicDrugTable(c(genesA, genesB), civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Predisposing", var_type = "fusion")[[1]]
Table legend
Evidence Level
836 genes with CN values > 4 (gains) and or < 1 (losses) were screened for suitable drugs (see Copy-number altered genes section).
Evidence pertaining to a variant’s effect on therapeutic response.
##### Generate table with drugs targeting CN altered genes
genes <- mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Predictive", var_type = "copy_gain")[[1]]
Table legend
Evidence Level
Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival.
##### Generate table with drugs targeting CN altered genes
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Prognostic", var_type = "copy_gain")[[1]]
Table legend
Evidence Level
Evidence pertaining to a variant’s impact on patient diagnosis.
##### Generate table with drugs targeting CN altered genes
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Diagnostic", var_type = "copy_gain")[[1]]
Table legend
Evidence Level
Evidence pertains to a variant’s role in conferring susceptibility to a disease.
##### Generate table with drugs targeting CN altered genes
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Predisposing", var_type = "copy_gain")[[1]]
Table legend
Evidence Level
Evidence pertaining to a variant’s effect on therapeutic response.
##### Generate table with drugs targeting CN altered genes
genes <- mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Predictive", var_type = "copy_loss")[[1]]
Table legend
Evidence Level
Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival.
##### Generate table with drugs targeting CN altered genes
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Prognostic", var_type = "copy_loss")[[1]]
Table legend
Evidence Level
Evidence pertaining to a variant’s impact on patient diagnosis.
##### Generate table with drugs targeting CN altered genes
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Diagnostic", var_type = "copy_loss")[[1]]
Table legend
Evidence Level
Evidence pertains to a variant’s role in conferring susceptibility to a disease.
##### Generate table with drugs targeting CN altered genes
civicDrugTable(genes, civic_var_summaries = caner_genes_annot.list[["civic_var_summaries"]], civic_clin_evid = caner_genes_annot.list[["civic_clin_evid"]], evid_type = "Predisposing", var_type = "copy_loss")[[1]]
Table legend
Evidence Level
Parameters
for ( i in 1:length(params) ) {
cat(paste("Parameter: ", names(params)[i], "\nValue: ", paste(unlist(params[i]), collapse = ","), "\n\n", sep=""))
}
Parameter: ref_data_dir
Value: ../data
Parameter: genes_cancer
Value: genes/cancer_genes.20181112.genes
Parameter: genes_immune
Value: genes/Genes_immune.txt
Parameter: oncokb_genes
Value: OncoKB/CancerGenesList.txt
Parameter: oncokb_clin_vars
Value: OncoKB/allActionableVariants.txt
Parameter: oncokb_all_vars
Value: OncoKB/allAnnotatedVariants.txt
Parameter: civic_var_summaries
Value: CIViC/01-Oct-2018-VariantSummaries.tsv
Parameter: civic_clin_evid
Value: CIViC/01-Oct-2018-ClinicalEvidenceSummaries.tsv
Parameter: cancer_biomarkers_trans
Value: cancer_biomarkers_database/cancer_genes_upon_trans.tsv
Parameter: cn_loss
Value: 1
Parameter: cn_gain
Value: 4
Parameter: ensembl_version
Value: 86
Parameter: ucsc_genome_assembly
Value: 19
Parameter: report_dir
Value: /Users/jmarzec/data/Patients/final/CCR180038_SV18T002P006_RNA/RNAseq_report
Parameter: sample_name
Value: CCR180038_SV18T002P006_RNA
Parameter: tissue
Value: pancreas
Parameter: plots_mode
Value: static
Parameter: count_file
Value: /Users/jmarzec/data/Patients/final/CCR180038_SV18T002P006_RNA/CCR180038_SV18T002P006_RNA-ready.counts
Parameter: batch
Value: /Users/jmarzec/data/Patients/umccrised/2016_249_18_SV_P006_1__CCR180038_SV18T002P006
Session info
devtools::session_info()
─ Session info ──────────────────────────────────────────────────────────
setting value
version R version 3.5.0 (2018-04-23)
os macOS 10.14.2
system x86_64, darwin15.6.0
ui X11
language (EN)
collate en_AU.UTF-8
ctype en_AU.UTF-8
tz Australia/Melbourne
date 2019-01-18
─ Packages ──────────────────────────────────────────────────────────────
package * version date lib
AnnotationDbi * 1.44.0 2018-10-30 [1]
AnnotationFilter * 1.6.0 2018-10-30 [1]
assertthat 0.2.0 2017-04-11 [1]
backports 1.1.3 2018-12-14 [1]
bindr 0.1.1 2018-03-13 [1]
bindrcpp * 0.2.2 2018-03-29 [1]
Biobase * 2.42.0 2018-10-30 [1]
BiocGenerics * 0.28.0 2018-10-30 [1]
BiocParallel 1.16.5 2019-01-05 [1]
biomaRt 2.38.0 2018-10-30 [1]
Biostrings * 2.50.2 2019-01-03 [1]
bit 1.1-14 2018-05-29 [1]
bit64 0.9-7 2017-05-08 [1]
bitops 1.0-6 2013-08-17 [1]
blob 1.1.1 2018-03-25 [1]
broom 0.5.1 2018-12-05 [1]
BSgenome * 1.50.0 2018-10-30 [1]
BSgenome.Hsapiens.UCSC.hg19 * 1.4.0 2019-01-10 [1]
callr 3.1.1 2018-12-21 [1]
cellranger 1.1.0 2016-07-27 [1]
cli 1.0.1 2018-09-25 [1]
colorspace 1.3-2 2016-12-14 [1]
crayon 1.3.4 2017-09-16 [1]
crosstalk 1.0.0 2016-12-21 [1]
curl 3.2 2018-03-28 [1]
data.table 1.11.8 2018-09-30 [1]
DBI 1.0.0 2018-05-02 [1]
DelayedArray 0.8.0 2018-10-30 [1]
desc 1.2.0 2018-05-01 [1]
devtools 2.0.1 2018-10-26 [1]
digest 0.6.18 2018-10-10 [1]
dplyr * 0.7.8 2018-11-10 [1]
DT 0.5 2018-11-05 [1]
edgeR * 3.24.3 2019-01-02 [1]
EnsDb.Hsapiens.v86 * 2.99.0 2018-10-22 [1]
ensembldb * 2.6.3 2018-11-21 [1]
evaluate 0.12 2018-10-09 [1]
farver 1.1.0 2018-11-20 [1]
forcats * 0.3.0 2018-02-19 [1]
fs 1.2.6 2018-08-23 [1]
generics 0.0.2 2018-11-29 [1]
GenomeInfoDb * 1.18.1 2018-11-12 [1]
GenomeInfoDbData 1.2.0 2018-11-13 [1]
GenomicAlignments 1.18.1 2019-01-04 [1]
GenomicFeatures * 1.34.1 2018-11-03 [1]
GenomicRanges * 1.34.0 2018-10-30 [1]
getopt 1.20.2 2018-02-16 [1]
ggforce * 0.1.3 2018-07-07 [1]
ggplot2 * 3.1.0 2018-10-25 [1]
glue 1.3.0 2018-07-17 [1]
gtable 0.2.0 2016-02-26 [1]
haven 2.0.0 2018-11-22 [1]
hms 0.4.2 2018-03-10 [1]
htmltools 0.3.6 2017-04-28 [1]
htmlwidgets 1.3 2018-09-30 [1]
httpuv 1.4.5.1 2018-12-18 [1]
httr 1.4.0 2018-12-11 [1]
IRanges * 2.16.0 2018-10-30 [1]
jsonlite 1.6 2018-12-07 [1]
kableExtra * 0.9.0 2018-05-21 [1]
knitr * 1.21 2018-12-10 [1]
labeling 0.3 2014-08-23 [1]
later 0.7.5 2018-09-18 [1]
lattice 0.20-38 2018-11-04 [1]
lazyeval 0.2.1 2017-10-29 [1]
limma * 3.38.3 2018-12-02 [1]
locfit 1.5-9.1 2013-04-20 [1]
lubridate 1.7.4 2018-04-11 [1]
magick * 2.0 2018-10-05 [1]
magrittr 1.5 2014-11-22 [1]
MASS 7.3-51.1 2018-11-01 [1]
Matrix * 1.2-15 2018-11-01 [1]
matrixStats * 0.54.0 2018-07-23 [1]
memoise 1.1.0 2017-04-21 [1]
mime 0.6 2018-10-05 [1]
modelr 0.1.2 2018-05-11 [1]
munsell 0.5.0 2018-06-12 [1]
nlme 3.1-137 2018-04-07 [1]
NOISeq * 2.26.1 2019-01-04 [1]
optparse * 1.6.0 2018-06-17 [1]
pander 0.6.3 2018-11-06 [1]
pdftools 2.0 2018-12-11 [1]
pillar 1.3.1 2018-12-15 [1]
pkgbuild 1.0.2 2018-10-16 [1]
pkgconfig 2.0.2 2018-08-16 [1]
pkgload 1.0.2 2018-10-29 [1]
plotly 4.8.0.9000 2019-01-04 [1]
plyr 1.8.4 2016-06-08 [1]
png 0.1-7 2013-12-03 [1]
preprocessCore * 1.44.0 2018-10-30 [1]
prettyunits 1.0.2 2015-07-13 [1]
processx 3.2.1 2018-12-05 [1]
progress 1.2.0 2018-06-14 [1]
promises 1.0.1 2018-04-13 [1]
ProtGenerics 1.14.0 2018-10-30 [1]
ps 1.3.0 2018-12-21 [1]
purrr * 0.2.5 2018-05-29 [1]
R6 2.3.0 2018-10-04 [1]
rapportools * 1.0 2014-01-07 [1]
RColorBrewer 1.1-2 2014-12-07 [1]
Rcpp 1.0.0 2018-11-07 [1]
RCurl 1.95-4.11 2018-07-15 [1]
readr * 1.3.1 2018-12-21 [1]
readxl 1.2.0 2018-12-19 [1]
remotes 2.0.2 2018-10-30 [1]
reshape * 0.8.8 2018-10-23 [1]
rlang 0.3.1 2019-01-08 [1]
rmarkdown 1.11 2018-12-08 [1]
rprojroot 1.3-2 2018-01-03 [1]
Rsamtools 1.34.0 2018-10-30 [1]
RSQLite 2.1.1 2018-05-06 [1]
rstudioapi 0.9.0 2019-01-09 [1]
rtracklayer * 1.42.1 2018-11-21 [1]
rvest 0.3.2 2016-06-17 [1]
S4Vectors * 0.20.1 2018-11-09 [1]
scales 1.0.0 2018-08-09 [1]
sessioninfo 1.1.1 2018-11-05 [1]
shiny 1.2.0 2018-11-02 [1]
stringi 1.2.4 2018-07-20 [1]
stringr * 1.3.1 2018-05-10 [1]
SummarizedExperiment 1.12.0 2018-10-30 [1]
testthat 2.0.1 2018-10-13 [1]
tibble * 2.0.0 2019-01-04 [1]
tidyr * 0.8.2 2018-10-28 [1]
tidyselect 0.2.5 2018-10-11 [1]
tidyverse * 1.2.1 2017-11-14 [1]
tweenr 1.0.1 2018-12-14 [1]
units 0.6-2 2018-12-05 [1]
usethis 1.4.0 2018-08-14 [1]
viridisLite 0.3.0 2018-02-01 [1]
withr 2.1.2 2018-03-15 [1]
xfun 0.4 2018-10-23 [1]
XML 3.98-1.16 2018-08-19 [1]
xml2 1.2.0 2018-01-24 [1]
xtable 1.8-3 2018-08-29 [1]
XVector * 0.22.0 2018-10-30 [1]
yaml 2.2.0 2018-07-25 [1]
zlibbioc 1.28.0 2018-10-30 [1]
source
Bioconductor
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
Bioconductor
Bioconductor
Bioconductor
Bioconductor
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
Bioconductor
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
Bioconductor
Bioconductor
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
Bioconductor
Bioconductor
Bioconductor
Bioconductor
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
Github (ropensci/plotly@c6b8a5d)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.2)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.0)
Bioconductor
CRAN (R 3.5.0)
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
Bioconductor
CRAN (R 3.5.0)
CRAN (R 3.5.2)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
CRAN (R 3.5.0)
Bioconductor
CRAN (R 3.5.0)
Bioconductor
[1] /Library/Frameworks/R.framework/Versions/3.5/Resources/library